In [1]:
import nltk
from nltk.stem.lancaster import LancasterStemmer

In [2]:
import numpy as np
import tflearn
import tensorflow as tf
import random

In [3]:
# import our chat-bot intents file
import json
with open('intents.json') as json_data:
    intents = json.load(json_data)

# Stop words

In [4]:
from nltk.corpus import stopwords

In [5]:
stop_words=stopwords.words('french')

In [6]:
english=stopwords.words('english')

In [7]:
stop_words.extend(english)

In [8]:
words = [] #Design the Vocabulary (unique words)
classes = []
documents = []
stop_words=stopwords.words('french')
english=stopwords.words('english')
stop_words.extend(english)
stop_words.extend(["?","!",".",","])
# loop through each sentence in our intents patterns
for intent in intents['intents']:
    for pattern in intent['patterns']:
        # tokenize each word in the sentence
        w = nltk.word_tokenize(pattern)
        # add to our words list
        words.extend(w)
        # add to documents in our corpus
        documents.append((w, intent['tag']))
        # add to our classes list
        if intent['tag'] not in classes:
            classes.append(intent['tag'])  

In [9]:
documents[0:10]

[(['Hi'], 'greeting'),
 (['How', 'are', 'you'], 'greeting'),
 (['Is', 'anyone', 'there', '?'], 'greeting'),
 (['Hello'], 'greeting'),
 (['Good', 'day'], 'greeting'),
 (['who', 'is', 'the', 'manager', 'of'], 'askManager'),
 (['manager', 'of'], 'askManager'),
 (['tell', 'me', 'who', 'is', 'the', 'manager', 'of'], 'askManager'),
 (['who', 'manages'], 'askManager'),
 (['Bye'], 'goodbye')]

In [10]:
stemmer = LancasterStemmer()

In [11]:
# stem and lower each word and remove duplicates
words = [stemmer.stem(w.lower()) for w in words if w not in stop_words]
words = sorted(list(set(words)))

In [12]:
# remove duplicates
classes = sorted(list(set(classes)))

print (len(documents), "documents")
print (len(classes), "classes", classes)
print (len(words), "unique stemmed words", words)


43 documents
12 classes ['askManager', 'goodbye', 'greeting', 'hours', 'mopeds', 'opentoday', 'payments', 'rental', 'thanks', 'today', 'translation', 'weather']
51 unique stemmed words ["'d", "'s", 'acceiv', 'anyon', 'ar', 'bye', 'can', 'card', 'cash', 'clim', 'credit', 'day', 'do', 'good', 'goodby', 'hello', 'help', 'hi', 'hour', 'how', 'i', 'is', 'kind', 'konw', 'lat', 'lik', 'man', 'mastercard', 'mop', 'na', 'op', 'outsid', 'rent', 'see', 'tak', 'tel', 'thank', 'that', 'thing', 'today', 'transl', 'u', 'veux', 'wan', 'want', 'weath', 'what', 'wheath', 'when', 'which', 'work']


In [13]:
# create our training data
training = []
output = []

# create an empty array for our output
output_empty = [0] * len(classes)

# training set, bag of words for each sentence
for doc in documents:
    # initialize our bag of words
    bag = []
    # list of tokenized words for the pattern (pattern = what user says)
    pattern_words = doc[0] 
    # stem each word
    pattern_words = [stemmer.stem(word.lower()) for word in pattern_words]
    # print(pattern_words)
    # create our bag of words array
    # mark the presence of words as a boolean value, 0 for absent, 1 for present.
    for w in words:
        bag.append(1) if w in pattern_words else bag.append(0)

    # output is a '0' for each tag and '1' for current tag
    output_row = list(output_empty)
    output_row[classes.index(doc[1])] = 1

    training.append([bag, output_row])

In [14]:
# shuffle our features and turn into np.array
random.shuffle(training)
training = np.array(training)

# create train and test lists
train_x = list(training[:,0])
train_y = list(training[:,1])

In [15]:
len(train_y)

43

In [16]:
# reset underlying graph data
tf.reset_default_graph()
# Build neural network
net = tflearn.input_data(shape=[None, len(train_x[0])])
net = tflearn.fully_connected(net, 8)
net = tflearn.fully_connected(net, 8)
net = tflearn.fully_connected(net, len(train_y[0]), activation='softmax')
net = tflearn.regression(net)

# Define model and setup tensorboard
model = tflearn.DNN(net, tensorboard_dir='tflearn_logs', tensorboard_verbose=3)
# Start training (apply gradient descent algorithm)
model.fit(train_x, train_y, n_epoch=1000, batch_size=8, show_metric=True)
model.save('model.tflearn')
# save all of our data structures
import pickle
pickle.dump( {'words':words, 'classes':classes, 'train_x':train_x, 'train_y':train_y}, open( "training_data", "wb" ) )

Training Step: 5999  | total loss: [1m[32m0.16191[0m[0m | time: 0.016s
| Adam | epoch: 1000 | loss: 0.16191 - acc: 0.9951 -- iter: 40/43
Training Step: 6000  | total loss: [1m[32m0.15422[0m[0m | time: 0.020s
| Adam | epoch: 1000 | loss: 0.15422 - acc: 0.9956 -- iter: 43/43
--
INFO:tensorflow:/home/helmi/Jupyter/Chatbot1/model.tflearn is not in all_model_checkpoint_paths. Manually adding it.
