In [1]:
# Importing libraries we need for NLP
import nltk
nltk.download('punkt')
from nltk.stem.lancaster import LancasterStemmer
stemmer = LancasterStemmer()

import numpy as np
import random
from tensorflow import keras
from keras.models import load_model

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [2]:
# import our chat-bot intents file
import json
with open('intents.json') as json_data:
    intents = json.load(json_data)

In [3]:
words = []
classes = []
documents = []
ignore_words = ['?']
# loop through each sentence in our intents patterns
for intent in intents['intents']:
    for pattern in intent['patterns']:
        # tokenize each word in the sentence
        w = nltk.word_tokenize(pattern)
        # add to our words list
        words.extend(w)
        # add to documents in our corpus
        documents.append((w, intent['tag']))
        # add to our classes list
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

# stem and lower each word and remove duplicates
words = [stemmer.stem(w.lower()) for w in words if w not in ignore_words]
words = sorted(list(set(words)))

# remove duplicates
classes = sorted(list(set(classes)))

print (len(documents), "documents")
print (len(classes), "classes", classes)
print (len(words), "unique stemmed words", words)

40 documents
10 classes ['cognitive-behavioural-therapy', 'doctor', 'goodbye', 'greeting', 'heart-disease', 'heartrates', 'heartrates-reading', 'noanswer', 'options', 'thanks']
85 unique stemmed words ["'s", ',', '.', 'abl', 'acc', 'alway', 'am', 'and', 'anxy', 'anyon', 'ar', 'be', 'beat', 'becaus', 'book', 'bye', 'calc', 'can', 'check', 'chronic', 'common', 'condit', 'could', 'day', 'depress', 'die', 'diseas', 'do', 'doct', 'feel', 'for', 'get', 'good', 'goodby', 'hav', 'heart', 'hello', 'help', 'hi', 'how', 'hum', 'i', 'is', 'just', 'kil', 'know', 'lat', 'lik', 'me', 'meas', 'might', 'most', 'my', 'nee', 'no', 'of', 'off', 'on', 'proc', 'provid', 'rat', 'recommend', 'see', 'sick', 'simpl', 'situ', 'soon', 'step', 'support', 'surv', 'symptom', 'tak', 'tel', 'thank', 'that', 'the', 'ther', 'thi', 'tir', 'to', 'want', 'what', 'wil', 'yo', 'you']


In [4]:
model = load_model('model_ChatBot.h5')

In [5]:
def clean_up_sentence(sentence):
    # tokenize the pattern
    sentence_words = nltk.word_tokenize(sentence)
    # stem each word
    sentence_words = [stemmer.stem(word.lower()) for word in sentence_words]
    return sentence_words

# return bag of words array: 0 or 1 for each word in the bag that exists in the sentence
def bow(sentence, words, show_details=False):
    # tokenize the pattern
    sentence_words = clean_up_sentence(sentence)
    # bag of words
    bag = [0]*len(words)  
    for s in sentence_words:
        for i,w in enumerate(words):
            if w == s: 
                bag[i] = 1
                if show_details:
                    print ("found in bag: %s" % w)

    return(np.array(bag))

In [6]:
# create a data structure to hold user context
context = {}

ERROR_THRESHOLD = 0.65
def classify(sentence):
    # generate probabilities from the model
    p = bow(sentence, words)
    
    d = len(p)
    f = len(documents)-2
    a = np.zeros([f, d])
    tot = np.vstack((p,a))
    
    results = model.predict(tot)[0]
    
    # filter out predictions below a threshold
    results = [[i,r] for i,r in enumerate(results) if r>ERROR_THRESHOLD]
    # sort by strength of probability
    results.sort(key=lambda x: x[1], reverse=True)
    return_list = []
    for r in results:
        return_list.append((classes[r[0]], r[1]))
    # return tuple of intent and probability
    return return_list

def response(sentence, userID, show_details=False):
    results = classify(sentence)
    print('Result:',results)
    # if we have a classification then find the matching intent tag
    if results:
        # loop as long as there are matches to process
        while results:
            for i in intents['intents']:
                # find a tag matching the first result
                if i['tag'] == results[0][0]:
                    # set context for this intent if necessary
                    #print(i)
                    if 'context_set' in i:
                        if show_details: print ('context:', i['context_set'])
                        context[userID] = i['context_set']

                    # check if this intent is contextual and applies to this user's conversation
                    if not 'context_filter' in i or \
                        (userID in context and 'context_filter' in i and i['context_filter'] == context[userID]):
                        if show_details: print ('tag:', i['tag'])
                        # a random response from the intent
                        return (random.choice(i['responses']))
            results.pop(0)

In [7]:
response('the symptoms of heart disease?', '123', show_details=True)

Result: [('heart-disease', 1.0)]
context: symptoms
tag: heart-disease


'Pain, numbness, weakness or coldness in your legs or arms if the blood vessels in those parts of your body are narrowed'

In [8]:
context

{'123': 'symptoms'}

In [9]:
response('hello there', '123', show_details=True)

Result: [('greeting', 0.99999964)]
context: 
tag: greeting


'Hello, I am here to help you'

In [10]:
response('calculate heart rates', '123', show_details=True)

Result: [('heartrates', 0.9920324)]
context: calculateheartrates
tag: heartrates


"1. At the wrist, lightly press the index and middle fingers of one hand on the opposite wrist, just below the base of the thumb, 2. At the neck, lightly press the side of the neck, just below your jawbone, 3. Count the number of beats in 15 seconds, and multiply by four. That's your heart rate."

In [11]:
context

{'123': 'calculateheartrates'}