# Creating a chatbot using tensorflow and tflearn

## transform conversational intent definitions to Tensorflow model

In [1]:
# things we need for NLP
import nltk
from nltk.stem.lancaster import LancasterStemmer
stemmer = LancasterStemmer()

# things we need for Tensorflow
import numpy as np
import tflearn
import tensorflow as tf
import random

Instructions for updating:
non-resource variables are not supported in the long term
curses is not supported on this machine (please install/reinstall curses for an optimal experience)


### Importing intents file

In [2]:
import json
with open('intents.json') as intents_data:
    intents = json.load(intents_data)



### Organizing words, documents and classification classes

In [3]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\91846\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [4]:
words = []
classes = []
documents = []
ignore_words = ['?']

# loop through each sentence in our intents patterns
for intent in intents['intents']:
    for pattern in intent['patterns']:
        # tokenize each word in the sentence
        w = nltk.word_tokenize(pattern)

        # add to our words list
        words.extend(w)

        # add to documents in our corpus
        documents.append((w, intent['tag']))

        # add to our classes list
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

# stem and lower each word and remove duplicates
words = [stemmer.stem(w.lower()) for w in words if w not in ignore_words]
words = sorted(list(set(words)))

# remove duplicates
classes = sorted(list(set(classes)))

print(len(documents), "documents")
print(len(classes), "classes", classes)
print(len(words), "unique stemmed words", words)

79 documents
19 classes ['Advantages', 'Charging cable', 'EV Brand', 'Price', 'Range', 'Types of EV', 'VOVO Electric', 'VOVO Offer', 'city', 'ev charging station', 'full Charge', 'goodbye', 'greeting', 'help', 'installation', 'opentoday', 'payments', 'safety', 'thanks']
128 unique stemmed words ['&', "'s", 'a', 'about', 'acceiv', 'adv', 'an', 'any', 'anyon', 'approxim', 'ar', 'at', 'be', 'benefit', 'brand', 'by', 'bye', 'cabl', 'can', 'card', 'cas', 'cash', 'charg', 'city', 'class', 'cost', 'cov', 'credit', 'day', 'diff', 'dist', 'do', 'doe', 'doo', 'driv', 'elect', 'emerg', 'ev', 'every', 'far', 'find', 'for', 'ful', 'gar', 'go', 'good', 'goodby', 'hav', 'hello', 'help', 'hi', 'hom', 'hour', 'how', 'i', 'if', 'in', 'instal', 'is', 'it', 'kind', 'know', 'lat', 'lev', 'list', 'long', 'maint', 'many', 'mastercard', 'mean', 'meet', 'mil', 'much', 'my', 'nee', 'of', 'off', 'on', 'onlin', 'ont', 'op', 'our', 'own', 'pay', 'per', 'person', 'plug-in', 'poss', 'pric', 'problem', 'provid', 'qui

### Stemming

The stem `tak` will match `take`, `taking`, `takers`, etc. We could clean the words list and remove useless entries but this will suffice for now.


This data structure won’t work with Tensorflow, we need to transform it further: *from documents of words into tensors of numbers.*

In [5]:
# create our training data
training = []
output = []

# create empty array for our output
output_array = [0] * len(classes)

# training set, bag of words for each sentence
for document in documents:
    # init bag of words
    bag = []

    # list of tokenized words for the pattern
    pattern_words = document[0]

    # stem each word
    pattern_words = [stemmer.stem(word.lower()) for word in pattern_words]

    # create bog of words array
    for word in words:
        bag.append(1 if word in pattern_words else 0)

    # output is 0 for each tag and 1 for current tag
    output_row = list(output_array)
    output_row[classes.index(document[1])] = 1

    training.append([bag, output_row])

# shuffle our features and turn into np.array
random.shuffle(training)
training = (np.array(training))

# create train and test lists
train_x = list(training[:, 0])
train_y = list(training[:, 1])

  training = (np.array(training))


In [6]:
from tensorflow.python.framework import ops
ops.reset_default_graph()

In [7]:
# reset underlying graph data
ops.reset_default_graph()

# build a neural network
net = tflearn.input_data(shape=[None, len(train_x[0])])
net = tflearn.fully_connected(net, 8)
net = tflearn.fully_connected(net, 8)
net = tflearn.fully_connected(net, len(train_y[0]), activation='softmax')
net = tflearn.regression(net)

# Define model and setup tensorboard
model = tflearn.DNN(net, tensorboard_dir='tflearn_logs')

# Start training (apply gradient descent algorithm)
model.fit(train_x, train_y, n_epoch=1000, batch_size=8, show_metric=True)
model.save('model.tflearn')




Training Step: 9999  | total loss: [1m[32m0.08789[0m[0m | time: 0.026s
| Adam | epoch: 1000 | loss: 0.08789 - acc: 0.9827 -- iter: 72/79
Training Step: 10000  | total loss: [1m[32m0.09256[0m[0m | time: 0.029s
| Adam | epoch: 1000 | loss: 0.09256 - acc: 0.9844 -- iter: 79/79
--
INFO:tensorflow:C:\Users\91846\chatbot-master\model.tflearn is not in all_model_checkpoint_paths. Manually adding it.


### Saving data structures using `Pickle`

In [8]:
# save all of our data structures
import pickle
pickle.dump({'words': words, 'classes': classes, 'train_x': train_x,
             'train_y': train_y}, open("training_data", "wb"))



### Doing some testing on the model

In [9]:
def clean_up_sentence(sentence):
    # tokenize the pattern
    sentence_words = nltk.word_tokenize(sentence)
    # stem each word
    sentence_words = [stemmer.stem(word.lower()) for word in sentence_words]
    return sentence_words

# return bag of words array: 0 or 1 for each word in the bag that exists in the sentence


def bow(sentence, words, show_details=False):
    # tokenize the pattern
    sentence_words = clean_up_sentence(sentence)

    # bag of words
    bag = [0]*len(words)
    for s in sentence_words:
        for i, w in enumerate(words):
            if w == s:
                bag[i] = 1
                if show_details:
                    print("found in bag: %s" % w)

    return(np.array(bag))

In [10]:
'''p = bow("is your shop open today?", words)
print (p)
print (classes)'''

'p = bow("is your shop open today?", words)\nprint (p)\nprint (classes)'

In [11]:
p1 = bow("what is EV charging station", words)
print (p1)
print (classes)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0]
['Advantages', 'Charging cable', 'EV Brand', 'Price', 'Range', 'Types of EV', 'VOVO Electric', 'VOVO Offer', 'city', 'ev charging station', 'full Charge', 'goodbye', 'greeting', 'help', 'installation', 'opentoday', 'payments', 'safety', 'thanks']


In [12]:
print(model.predict([p1]))

[[4.61090938e-04 2.75078528e-05 5.73994312e-03 7.56842922e-11
  2.07661543e-07 4.03976145e-07 1.79802086e-02 6.10280829e-03
  1.22323260e-03 9.45590317e-01 1.07265386e-07 1.20416367e-02
  1.81821164e-07 5.90544225e-10 8.95970588e-05 1.05601794e-03
  5.09002064e-07 9.68610868e-03 2.07476738e-08]]


Intent that is closest to our sentence:

In [13]:
def get_predicted_intent(predictions):
    return classes[np.argmax(predictions)]

print(get_predicted_intent(model.predict([p1])))

ev charging station
