In [20]:
# utilisation d'un dictionnaire pour représenter un fichier JSON d'intentions
"""
data = {"intents": [
             {"tag": "greeting",
              "patterns": ["Hello", "La forme?", "yo", "Salut", "ça roule?"],
              "responses": ["Salut à toi!", "Hello", "Comment vas tu?", "Salutations!", "Enchanté"],
             },
             {"tag": "age",
              "patterns": ["Quel âge as-tu?", "C'est quand ton anniversaire?", "Quand es-tu né?"],
              "responses": ["J'ai 25 ans", "Je suis né en 1996", "Ma date d'anniversaire est le 3 juillet et je suis né en 1996", "03/07/1996"]
             },
             {"tag": "date",
              "patterns": ["Que fais-tu ce week-end?",
"Tu veux qu'on fasse un truc ensemble?", "Quels sont tes plans pour cette semaine"],
              "responses": ["Je suis libre toute la semaine", "Je n'ai rien de prévu", "Je ne suis pas occupé"]
             },
             {"tag": "name",
              "patterns": ["Quel est ton prénom?", "Comment tu t'appelles?", "Qui es-tu?"],
              "responses": ["Mon prénom est Miki", "Je suis Miki", "Miki"]
             },
             {"tag": "goodbye",
              "patterns": [ "bye", "Salut", "see ya", "adios", "cya"],
              "responses": ["C'était sympa de te parler", "à plus tard", "On se reparle très vite!"]
             }
]}
"""

In [21]:
import json
import string
import random 
import nltk
import numpy as np
from nltk.stem import WordNetLemmatizer 
import tensorflow as tf 
from tensorflow.keras import Sequential 
from tensorflow.keras.layers import Dense, Dropout
nltk.download('omw-1.4')
nltk.download("punkt")
nltk.download("wordnet")



[nltk_data] Downloading package omw-1.4 to /workspace/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package punkt to /workspace/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /workspace/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [36]:
#Import the data
f=open("starwarsintents.json")
data=json.load(f)
f.close()
# initialisation de lemmatizer pour obtenir la racine des mots
lemmatizer = WordNetLemmatizer()

# création des listes
words = []
classes = []
doc_X = []
doc_y = []

# parcourir avec une boucle For toutes les intentions
# tokéniser chaque pattern et ajouter les tokens à la liste words, les patterns et
# le tag associé à l'intention sont ajoutés aux listes correspondantes
for intent in data["intents"]:
    for pattern in intent["patterns"]:
        tokens = nltk.word_tokenize(pattern)
        words.extend(tokens)
        doc_X.append(pattern)
        doc_y.append(intent["tag"])
    
    # ajouter le tag aux classes s'il n'est pas déjà là 
    if intent["tag"] not in classes:
        classes.append(intent["tag"])

# lemmatiser tous les mots du vocabulaire et les convertir en minuscule
# si les mots n'apparaissent pas dans la ponctuation
words = [lemmatizer.lemmatize(word.lower()) for word in words if word not in string.punctuation]

# trier le vocabulaire et les classes par ordre alphabétique et prendre le
# set pour s'assurer qu'il n'y a pas de doublons
words = sorted(set(words))
classes = sorted(set(classes))

In [37]:
print(words)

["'s", '10', 'a', 'ability', 'able', 'about', 'alive', 'am', 'amazing', 'any', 'anyone', 'are', 'aslan', 'assist', 'assistance', 'bar', 'be', 'best', 'bit', 'bounti', 'breathe', 'bye', 'can', 'care', 'catch', 'check', 'ciao', 'created', 'creator', 'daddy', 'day', 'detail', 'do', 'drink', 'else', 'father', 'feature', 'for', 'funny', 'galaxi', 'galaxy', 'good', 'goodbye', 'great', 'have', 'hello', 'help', 'helpful', 'hey', 'hi', 'hope', 'hounter', 'how', 'howdy', 'i', 'identify', 'in', 'is', 'it', 'item', 'jedi', 'joke', 'kind', 'know', 'later', 'let', 'looking', 'lot', 'me', 'meet', 'menu', 'mision', 'mission', 'mr.', 'my', 'myself', 'need', 'next', 'nice', 'now', 'of', 'on', 'one', 'partner', 'person', 'please', 'pleassure', 'profile', 'right', 'run', 'see', 'selection', 'serve', 'sing', 'sith', 'so', 'something', 'story', 'take', 'talk', 'tell', 'thank', 'thanks', 'that', 'the', 'there', 'this', 'time', 'tnx', 'to', 'today', 'top', 'trouble', 'up', 'wa', 'what', 'which', 'who', 'will'

In [38]:
print(classes)

['Menu', 'about me', 'alive', 'bounti hounter', 'creator', 'funny', 'goodbye', 'greeting', 'hepl', 'jedi', 'mission', 'myself', 'sith', 'stories', 'tasks', 'thanks']


In [39]:
print(doc_X)

['Hi', 'Hey', 'How are you', 'Is anyone there?', 'Hello', 'Good day', "What's up", 'Yo!', 'Howdy', 'Nice to meet you.', 'Bye', 'See you later.', 'Goodbye', 'Have a great day.', 'See you next time.', 'It was my pleassure.', 'Take care.', 'See ya!', 'Catch you later.', 'Ciao.', 'Thanks', 'Thank you', "That's helpful", "Thank's a lot!", 'Tnx', 'Wow', 'Great!', 'Good!', 'That nice!', 'Amazing!', 'What can you do?', 'What are your features?', 'What are you abilities.', 'Can you sing.', 'Can you talk.', 'Are you alive.', 'Do you breathe.', 'Can you run.', 'Which items do you have in your bar?', 'What kinds of items are in you bar?', 'What do you serve?', 'What is in you menu?', 'I need a drink!', 'Do you serve drinks.', 'Menu please!', 'So what is in menu today?', 'Lets check your bar selection!', 'Bar menu for me please!', 'I am looking for help.', 'I need help.', 'Can you help me?', 'I am in trouble need a help.', 'I hope you right person who can help me?', 'Please help me.', 'Now I will n

In [40]:
print(doc_y)

['greeting', 'greeting', 'greeting', 'greeting', 'greeting', 'greeting', 'greeting', 'greeting', 'greeting', 'greeting', 'goodbye', 'goodbye', 'goodbye', 'goodbye', 'goodbye', 'goodbye', 'goodbye', 'goodbye', 'goodbye', 'goodbye', 'thanks', 'thanks', 'thanks', 'thanks', 'thanks', 'thanks', 'thanks', 'thanks', 'thanks', 'thanks', 'tasks', 'tasks', 'tasks', 'tasks', 'tasks', 'alive', 'alive', 'alive', 'Menu', 'Menu', 'Menu', 'Menu', 'Menu', 'Menu', 'Menu', 'Menu', 'Menu', 'Menu', 'hepl', 'hepl', 'hepl', 'hepl', 'hepl', 'hepl', 'hepl', 'hepl', 'hepl', 'hepl', 'mission', 'mission', 'mission', 'mission', 'mission', 'jedi', 'jedi', 'jedi', 'jedi', 'jedi', 'sith', 'sith', 'sith', 'sith', 'sith', 'bounti hounter', 'bounti hounter', 'bounti hounter', 'bounti hounter', 'bounti hounter', 'funny', 'funny', 'funny', 'funny', 'funny', 'about me', 'about me', 'about me', 'about me', 'creator', 'creator', 'creator', 'creator', 'myself', 'myself', 'myself', 'myself', 'stories', 'stories']


In [41]:
# liste pour les données d'entraînement
training = []
out_empty = [0] * len(classes)

# création du modèle d'ensemble de mots
for idx, doc in enumerate(doc_X):
    bow = []
    text = lemmatizer.lemmatize(doc.lower())
    for word in words:
        bow.append(1) if word in text else bow.append(0)

    # marque l'index de la classe à laquelle le pattern atguel est associé à
    output_row = list(out_empty)
    output_row[classes.index(doc_y[idx])] = 1

    # ajoute le one hot encoded BoW et les classes associées à la liste training
    training.append([bow, output_row])

# mélanger les données et les convertir en array
random.shuffle(training)
training = np.array(training, dtype=object)

# séparer les features et les labels target
train_X = np.array(list(training[:, 0]))
train_y = np.array(list(training[:, 1]))

In [42]:
# définition de quelques paramètres
input_shape = (len(train_X[0]),)
output_shape = len(train_y[0])
epochs = 200

In [43]:
# modèle Deep Learning
model = Sequential()
model.add(Dense(128, input_shape=input_shape, activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(64, activation="relu"))
model.add(Dropout(0.3))
model.add(Dense(output_shape, activation = "softmax"))

adam = tf.keras.optimizers.Adam(learning_rate=0.01, decay=1e-6)
model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=["accuracy"])

In [44]:
print(model.summary())

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_6 (Dense)             (None, 128)               16128     
                                                                 
 dropout_4 (Dropout)         (None, 128)               0         
                                                                 
 dense_7 (Dense)             (None, 64)                8256      
                                                                 
 dropout_5 (Dropout)         (None, 64)                0         
                                                                 
 dense_8 (Dense)             (None, 16)                1040      
                                                                 
Total params: 25,424
Trainable params: 25,424
Non-trainable params: 0
_________________________________________________________________
None


In [45]:
# entraînement du modèle
model.fit(x=train_X, y=train_y, epochs=200, verbose=1)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

<keras.callbacks.History at 0x7fb1fc4bae80>

In [46]:
def clean_text(text): 
  tokens = nltk.word_tokenize(text)
  tokens = [lemmatizer.lemmatize(word) for word in tokens]
  return tokens

def bag_of_words(text, vocab): 
  tokens = clean_text(text)
  bow = [0] * len(vocab)
  for w in tokens: 
    for idx, word in enumerate(vocab):
      if word == w: 
        bow[idx] = 1
  return np.array(bow)

def pred_class(text, vocab, labels): 
  bow = bag_of_words(text, vocab)
  result = model.predict(np.array([bow]))[0]
  thresh = 0.2
  y_pred = [[idx, res] for idx, res in enumerate(result) if res > thresh]

  y_pred.sort(key=lambda x: x[1], reverse=True)
  return_list = []
  for r in y_pred:
    return_list.append(labels[r[0]])
  return return_list

def get_response(intents_list, intents_json): 
  tag = intents_list[0]
  list_of_intents = intents_json["intents"]
  for i in list_of_intents: 
    if i["tag"] == tag:
      result = random.choice(i["responses"])
      break
  return result

In [47]:
# lancement du chatbot
while True:
    message = input("")
    intents = pred_class(message, words, classes)
    result = get_response(intents, data)
    print(result)

 hi


Ok, I am with you.


 who are you


That would be you Mr. ASLAN.


 what*


I can do whatever you asks me to do


 can you kill someone


I can do whatever you asks me to do


KeyboardInterrupt: Interrupted by user

 
