In [None]:
import nltk
nltk.download('punkt')

In [70]:
# what we need for NLP
from nltk.stem.lancaster import LancasterStemmer
stemmer = LancasterStemmer()

# things we need for TensorFlow
import numpy as np
import tflearn 
import tensorflow as tf
import random

import json
with open('cs61-chatbot-intents.json') as json_data:
    intents = json.load(json_data)
    
words = []
classes = []
documents = []
ignore_words = ['?', '!', '.', ',', ':', ';']
# loop through each sentence in our intents patterns
for intent in intents['intents']:
    for pattern in intent['patterns']:
        # tokenize each word in the sentence
        w = nltk.word_tokenize(pattern)
        # add to our words list
        words.extend(w)
        # add to documents in our corpus
        documents.append((w, intent['tag']))
        # add to our classes list if not added yet
        if intent['tag'] not in classes:
            classes.append(intent['tag'])
            
# # stem and lower each word and remove duplicates
# words = [stemmer.stem(w.lower()) for w in words if w not in ignore_words]
# words = sorted(list(set(words)))

# # remove duplicates
# classes = sorted(list(set(classes)))

# print(len(documents), "documents")
# print(len(classes), "classes", classes)
# print(len(words), "unique stemmed words", words)

In [71]:
# stem and lower each word and remove duplicates
words = [stemmer.stem(w.lower()) for w in words if w not in ignore_words]
words = sorted(list(set(words)))

# remove duplicates
classes = sorted(list(set(classes)))

print(len(documents), "documents")
print(len(classes), "classes", classes)
print(len(words), "unique stemmed words", words)

84 documents
8 classes ['commit', 'commit-yes', 'goodbye', 'greeting', 'late days', 'late-yes', 'no', 'thanks']
328 unique stemmed words ['!', "'d", "'m", "'s", "'ve", '(', ')', ',', '.', '1', '10', '115', '144', '145', '15', '2', '214', '24', '3', '4', '48', '5', '5c5e364', '6', '94', 'a', 'abl', 'about', 'access', 'accid', 'act', 'ad', 'addit', 'address', 'ahead', 'al', 'allot', 'almost', 'also', 'am', 'amount', 'an', 'and', 'anew', 'anoth', 'anxy', 'any', 'anyon', 'anyth', 'appear', 'apprecy', 'ar', 'as', 'ask', 'assign', 'at', 'attend', 'back', 'battery', 'be', 'becaus', 'been', 'believ', 'below', 'benef', 'bit', 'brok', 'brush', 'busy', 'but', 'by', 'bye', 'ca', 'can', 'champ', 'chang', 'chant', 'check', 'circumst', 'cod', 'com', 'commit', 'complet', 'comput', 'concern', 'conf', 'consist', 'correct', 'could', 'countless', 'coupl', 'credit', 'cur', 'dat', 'day', 'deadlin', 'deadline/with', 'debug', 'detery', 'difficult', 'dilig', 'display', 'do', 'don', 'dorm', 'due', 'ed', 'eddy',

In [72]:
# make the input compatible with tensorflow - converting docs of words to tensors of numbers
# create our training data
training = []
output = []
# create an empty array for our output
output_empty = [0] * len(classes)

# training set, bag of words for each sentence
for doc in documents:
    # initialize our bag of words
    bag = []
    # list of tokenized words for the pattern
    pattern_words = doc[0]
    # stem each word
    pattern_words = [stemmer.stem(word.lower()) for word in pattern_words]
    # create our bag of words array
    for w in words:
        bag.append(1) if w in pattern_words else bag.append(0)

    # output is a '0' for each tag and '1' for current tag
    output_row = list(output_empty)
    output_row[classes.index(doc[1])] = 1

    training.append([bag, output_row])

# shuffle our features and turn into np.array
random.shuffle(training)
training = np.array(training)

# create train and test lists
train_x = list(training[:,0])
train_y = list(training[:,1])


# build the model
# reset underlying graph data
tf.compat.v1.reset_default_graph()
# Build neural network
net = tflearn.input_data(shape=[None, len(train_x[0])])
net = tflearn.fully_connected(net, 8)
net = tflearn.fully_connected(net, 8)
net = tflearn.fully_connected(net, len(train_y[0]), activation='softmax')
net = tflearn.regression(net)

# Define model and setup tensorboard
model = tflearn.DNN(net, tensorboard_dir='tflearn_logs')
# Start training (apply gradient descent algorithm)
model.fit(train_x, train_y, n_epoch=1000, batch_size=8, show_metric=True)
model.save('model.tflearn')

Training Step: 10999  | total loss: [1m[32m0.16928[0m[0m | time: 0.024s
| Adam | epoch: 1000 | loss: 0.16928 - acc: 0.9053 -- iter: 80/84
Training Step: 11000  | total loss: [1m[32m0.17062[0m[0m | time: 0.026s
| Adam | epoch: 1000 | loss: 0.17062 - acc: 0.9147 -- iter: 84/84
--
INFO:tensorflow:/Users/eliasabunuwara/Desktop/personal projects/cs61-chatbot/model.tflearn is not in all_model_checkpoint_paths. Manually adding it.


In [73]:
import pickle
pickle.dump( {'words':words, 'classes':classes, 'train_x':train_x, 'train_y':train_y}, open( "training_data", "wb" ) )

In [74]:
# restore all of our data structures
data = pickle.load(open("training_data", "rb"))
words = data['words']
classes = data['classes']
train_x = data['train_x']
train_y = data['train_y']
# dictionary to store user context
context = {}
# store user conversation 
conversations = {}
ERROR_THRESHOLD = 0.25

# import our chat-bot intents file
import json
with open('cs61-chatbot-intents.json') as json_data:
    intents = json.load(json_data)
    
# load our saved model
model.load('./model.tflearn')

# tokenize user input
def clean_up_sentence(sentence):
    # tokenize the pattern
    sentence_words = nltk.word_tokenize(sentence)
    # stem each word
    sentence_words = [stemmer.stem(word.lower()) for word in sentence_words]
    return sentence_words

# return bag of words array: 0 or 1 for each word in the bag that exists in the sentence
def bow(sentence, words, show_details=False):
    # tokenize the pattern
    sentence_words = clean_up_sentence(sentence)
    # bag of words
    bag = [0]*len(words)
    for s in sentence_words:
        for i,w in enumerate(words):
            if w == s:
                bag[i] = 1
                if show_details:
                    print ("found in bag: %s" % w)

    return(np.array(bag))


def classify(sentence):
    # generate probabilities from the model
    results = model.predict([bow(sentence, words)])[0]
    # filter out predictions below a threshold
    results = [[i,r] for i,r in enumerate(results) if r > ERROR_THRESHOLD]
    # sort by strength of probability
    results.sort(key=lambda x: x[1], reverse=True)
    return_list = []
    for r in results:
        return_list.append((classes[r[0]], r[1]))
    
    return (return_list)

def response(sentence, userID='123', show_details=False):
    results = classify(sentence)
    if userID in conversations:
        conversations[userID].append = sentence
    else:
        conversations[userID] = [sentence]
    # if we have a classification then find the matching intent tag
    if results:
        # loop as long as there are matches to process
        while results:
            for i in intents['intents']:
                # find a tag matching the first result
                if i['tag'] == results[0][0]:
                    # set context for this intent if necessary
                    if 'context_set' in i:
                        if show_details: 
                            print ('context:', i['context_set'])
                        context[userID] = i['context_set']
                        
                    # return random response from intent's set if it's not contextual, and if it's contextual and 
                    # matches the user's urrent context
                    if not 'context_filter' in i or \
                        (userID in context and 'context_filter' in i and i['context_filter'] == context[userID]):
                        if show_details:
                            print ('tag:', i['tag'])
                        # a random response from the intent
                        
                        return print(random.choice(i['responses']), results[0][1])    
                    
                    
            
            # treating the results list as a stack, we pop the first prediction if no match
            results.pop(0)
            

INFO:tensorflow:Restoring parameters from /Users/eliasabunuwara/Desktop/personal projects/cs61-chatbot/model.tflearn


In [77]:
response("I have a problem with my commit pset4 it seems like when i pushed to pset6 ot accidentally pushed to pset4")

If I'm correct, you have an issue with a pset commit? 0.9546086


In [80]:
response("no")

Sorry if I misundertood you. Could you try again? 0.9568679


In [82]:
response("i need more late hours because i was partying all week long")

Correct me if I'm wrong, you're asking for an extention? 0.8525875


In [83]:
response("yes")

We are happy to grant you the late days that you've requested. 0.4875152
