In [123]:
import random
import numpy as np

import nltk
from nltk.stem import WordNetLemmatizer
import json
import pickle

from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
# from keras.optimizers import SGD
from tensorflow.keras.optimizers import SGD

In [103]:
lemmatizer = WordNetLemmatizer()

print(lemmatizer)

<WordNetLemmatizer>


In [104]:
words=[]        
documents = []
classes = []
ignore_words = ['?', '!']

In [105]:
data_file = open('data.json').read()
intents = json.loads(data_file)
print(intents)

{'intents': [{'tag': 'greeting', 'patterns': ['Bonjour', 'Comment ça va', "Y a-t-il quelqu'un ?", 'Salut', 'Hola', 'Bonjour', 'Bonne journée'], 'responses': ['Salut, comment puis-je vous aider ?'], 'context': ['']}, {'tag': 'goodbye', 'patterns': ['Au revoir', 'À plus tard', 'Au revoir', "C'était agréable de discuter avec vous, au revoir", 'À la prochaine'], 'responses': ['Au revoir ! Revenez bientôt.'], 'context': ['']}, {'tag': 'thanks', 'patterns': ['Merci', 'Merci beaucoup', "C'est utile", 'Génial, merci', "Merci de m'avoir aidé"], 'responses': ['Ravi de pouvoir aider !'], 'context': ['']}, {'tag': 'noanswer', 'patterns': [], 'responses': ["Veuillez me donner plus d'informations"], 'context': ['']}, {'tag': 'options', 'patterns': ["Comment pourriez-vous m'aider avec la faculté des sciences Ibn Tofail à Kénitra ?", "Qu'est-ce que vous pouvez faire pour m'aider avec la faculté des sciences Ibn Tofail à Kénitra ?", 'Quelle aide pouvez-vous fournir concernant la faculté des sciences Ib

In [106]:
for intent in intents['intents']:
    for pattern in intent['patterns']:

        #tokenize each word
        w = nltk.word_tokenize(pattern)
        words.extend(w)

        #add documents in the corpus
        documents.append((w, intent['tag']))
        
        # add to our classes list
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

In [107]:
print(documents)

[(['Bonjour'], 'greeting'), (['Comment', 'ça', 'va'], 'greeting'), (['Y', 'a-t-il', "quelqu'un", '?'], 'greeting'), (['Salut'], 'greeting'), (['Hola'], 'greeting'), (['Bonjour'], 'greeting'), (['Bonne', 'journée'], 'greeting'), (['Au', 'revoir'], 'goodbye'), (['À', 'plus', 'tard'], 'goodbye'), (['Au', 'revoir'], 'goodbye'), (["C'était", 'agréable', 'de', 'discuter', 'avec', 'vous', ',', 'au', 'revoir'], 'goodbye'), (['À', 'la', 'prochaine'], 'goodbye'), (['Merci'], 'thanks'), (['Merci', 'beaucoup'], 'thanks'), (["C'est", 'utile'], 'thanks'), (['Génial', ',', 'merci'], 'thanks'), (['Merci', 'de', "m'avoir", 'aidé'], 'thanks'), (['Comment', 'pourriez-vous', "m'aider", 'avec', 'la', 'faculté', 'des', 'sciences', 'Ibn', 'Tofail', 'à', 'Kénitra', '?'], 'options'), (["Qu'est-ce", 'que', 'vous', 'pouvez', 'faire', 'pour', "m'aider", 'avec', 'la', 'faculté', 'des', 'sciences', 'Ibn', 'Tofail', 'à', 'Kénitra', '?'], 'options'), (['Quelle', 'aide', 'pouvez-vous', 'fournir', 'concernant', 'la', '

In [108]:
# return the base or dictionary form of a word, which is known as the lemma, lower case
words = [lemmatizer.lemmatize(w.lower()) for w in words if w not in ignore_words]
words

['bonjour',
 'comment',
 'ça',
 'va',
 'y',
 'a-t-il',
 "quelqu'un",
 'salut',
 'hola',
 'bonjour',
 'bonne',
 'journée',
 'au',
 'revoir',
 'à',
 'plus',
 'tard',
 'au',
 'revoir',
 "c'était",
 'agréable',
 'de',
 'discuter',
 'avec',
 'vous',
 ',',
 'au',
 'revoir',
 'à',
 'la',
 'prochaine',
 'merci',
 'merci',
 'beaucoup',
 "c'est",
 'utile',
 'génial',
 ',',
 'merci',
 'merci',
 'de',
 "m'avoir",
 'aidé',
 'comment',
 'pourriez-vous',
 "m'aider",
 'avec',
 'la',
 'faculté',
 'de',
 'science',
 'ibn',
 'tofail',
 'à',
 'kénitra',
 "qu'est-ce",
 'que',
 'vous',
 'pouvez',
 'faire',
 'pour',
 "m'aider",
 'avec',
 'la',
 'faculté',
 'de',
 'science',
 'ibn',
 'tofail',
 'à',
 'kénitra',
 'quelle',
 'aide',
 'pouvez-vous',
 'fournir',
 'concernant',
 'la',
 'faculté',
 'de',
 'science',
 'ibn',
 'tofail',
 'à',
 'kénitra',
 'comment',
 'pouvez-vous',
 'être',
 'utile',
 'pour',
 'répondre',
 'à',
 'me',
 'question',
 'sur',
 'la',
 'faculté',
 'de',
 'science',
 'ibn',
 'tofail',
 'à',

In [109]:
# remove duplicates set()
words = sorted(list(set(words)))

In [110]:
# sort classes
classes = sorted(list(set(classes)))

In [111]:
pickle.dump(words, open('texts.pkl','wb'))

In [112]:
pickle.dump(classes, open('labels.pkl','wb'))

In [113]:
# create our training data
training = []

# for our output
output_empty = [0] * len(classes)
output_empty

[0, 0, 0, 0, 0, 0]

In [114]:
b = True
# training set, bag of words for each sentence
for doc in documents:

    # initialize our bag of words
    bag = []

    # list of tokenized words for the pattern
    pattern_words = doc[0]

    # lemmatize each word - create base word, in attempt to represent related words
    pattern_words = [lemmatizer.lemmatize(word.lower()) for word in pattern_words]

    # create our bag of words array with 1, if word match found in current pattern
    for w in words:
        bag.append(1) if w in pattern_words else bag.append(0)
    
    # output is a '0' for each tag and '1' for current tag (for each pattern)
    output_row = list(output_empty)
    output_row[classes.index(doc[1])] = 1
    
    training.append([bag, output_row])

    if b : 
        print(len(words))
        print(len(bag))
        b = False
    print(bag)
    print(output_row)

72
72
[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 1, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0]
[0, 0, 1, 0, 0, 0]
[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0]
[0, 0, 1, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 1, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

In [115]:
training

[[[0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   1,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0],
  [0, 0, 1, 0, 0, 0]],
 [[0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   1,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   1,
   0,
   0,
   0,
   1,
   0,
   0],
  [0, 0, 1, 0, 0, 0]],
 [[0,
   1,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0

In [116]:
# Shuffling: It shuffles (randomly reorders) the elements of the list training in place.
random.shuffle(training)

In [166]:
print(len(training))
print(len(training[0]))
print(len(training[0][0]))
print(len(training[0][1]))

30
2
72
6


In [118]:
# training = np.array(training)

In [176]:
# create train and test lists. X - patterns, Y - intents
train_x = [training[x][0] for x in range(len(training))]
train_y = [training[x][1] for x in range(len(training))]

In [181]:
print(len(train_x))
print(len(train_x[0]))

30
72


In [182]:
# Create model - 3 layers. First layer 128 neurons, second layer 64 neurons and 3rd output layer contains number of neurons
# equal to number of intents to predict output intent with softmax
model = Sequential()
model.add(Dense(128, input_shape=(len(train_x[0]),), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation='softmax'))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [183]:
# Compile model. Stochastic gradient descent with Nesterov accelerated gradient gives good results for this model
sgd = SGD(learning_rate=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])



In [184]:
#fitting and saving the model 
hist = model.fit(np.array(train_x), np.array(train_y), epochs=200, batch_size=5, verbose=1)
model.save('model.h5', hist)

print("model created")

Epoch 1/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - accuracy: 0.1338 - loss: 1.8701   
Epoch 2/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.1205 - loss: 1.7174     
Epoch 3/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.2767 - loss: 1.6770 
Epoch 4/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.5114 - loss: 1.5311
Epoch 5/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.4195 - loss: 1.4126 
Epoch 6/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.5943 - loss: 1.4888  
Epoch 7/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.6462 - loss: 1.3409 
Epoch 8/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.7910 - loss: 1.0898  
Epoch 9/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━



model created
