In [3]:
import json 
import numpy as np 
import keras
from keras.models import Sequential
from keras.layers import Dense, Embedding, GlobalAveragePooling1D
from keras.preprocessing.text import Tokenizer
from keras_preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder

In [4]:
with open('Intent.json') as file:
    data = json.load(file)
training_sentences = []
training_labels = []
labels = []
responses = []
for intent in data['intents']:
    for text in intent['text']:
        training_sentences.append(text)
        training_labels.append(intent['intent'])
    responses.append(intent['responses'])   
    if intent['intent'] not in labels:
        labels.append(intent['intent'])      
num_classes = len(labels)

In [5]:
print(num_classes)

22


In [6]:
lbl_encoder = LabelEncoder()
lbl_encoder.fit(training_labels)
training_labels = lbl_encoder.transform(training_labels)

In [7]:
vocab_size = 1000
embedding_dim = 16
max_len = 20
oov_token = "<OOV>"

tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_token)
tokenizer.fit_on_texts(training_sentences)
word_index = tokenizer.word_index
print(word_index)
sequences = tokenizer.texts_to_sequences(training_sentences)
print(sequences)
padded_sequences = pad_sequences(sequences, truncating='post', maxlen=max_len)

{'<OOV>': 1, 'you': 2, 'is': 3, 'me': 4, 'are': 5, 'i': 6, 'what': 7, 'the': 8, 'thanks': 9, 'do': 10, 'can': 11, 'name': 12, 'am': 13, 'please': 14, 'your': 15, 'tell': 16, 'a': 17, 'pod': 18, 'bay': 19, 'door': 20, 'adam': 21, 'bella': 22, 'open': 23, 'to': 24, 'real': 25, 'time': 26, 'not': 27, 'prove': 28, 'my': 29, 'it': 30, 'how': 31, 'talking': 32, 'gossip': 33, 'why': 34, 'good': 35, 'call': 36, 'know': 37, 'see': 38, 'hello': 39, 'user': 40, 'this': 41, 'great': 42, 'who': 43, 'thank': 44, "wasn't": 45, 'bye': 46, 'goodbye': 47, 'very': 48, 'self': 49, 'aware': 50, 'hi': 51, 'there': 52, 'doing': 53, 'well': 54, 'ok': 55, 'for': 56, 'quiet': 57, 'clever': 58, 'girl': 59, 'any': 60, 'some': 61, 'joke': 62, 'will': 63, 'have': 64, 'conscious': 65, 'hola': 66, 'hya': 67, 'hope': 68, 'think': 69, "what's": 70, 'understand': 71, 'saying': 72, 'be': 73, 'up': 74, 'fuck': 75, 'anyone': 76, 'in': 77, 'camera': 78, 'intelligent': 79, 'genious': 80, 'by': 81, 'could': 82, 'friends': 83,

In [8]:
model = Sequential()
model.add(Embedding(vocab_size, embedding_dim, input_length=max_len))
model.add(GlobalAveragePooling1D())
model.add(Dense(16, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))
model.compile(loss='sparse_categorical_crossentropy', 
              optimizer='adam', metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 20, 16)            16000     
                                                                 
 global_average_pooling1d (G  (None, 16)               0         
 lobalAveragePooling1D)                                          
                                                                 
 dense (Dense)               (None, 16)                272       
                                                                 
 dense_1 (Dense)             (None, 16)                272       
                                                                 
 dense_2 (Dense)             (None, 22)                374       
                                                                 
Total params: 16,918
Trainable params: 16,918
Non-trainable params: 0
____________________________________________________

In [9]:
epochs = 500
history = model.fit(padded_sequences, np.array(training_labels), epochs=epochs)

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

In [10]:
# to save the trained model
model.save("chat_model")

import pickle

# to save the fitted tokenizer
with open('tokenizer.pickle', 'wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
# to save the fitted label encoder
with open('label_encoder.pickle', 'wb') as ecn_file:
    pickle.dump(lbl_encoder, ecn_file, protocol=pickle.HIGHEST_PROTOCOL)

INFO:tensorflow:Assets written to: chat_model\assets


In [12]:
import colorama 
colorama.init()
from colorama import Fore, Style, Back

import random
import pickle

with open("Intent.json") as file:
    data = json.load(file)


def chat():
    # load trained model
    model = keras.models.load_model('chat_model')

    # load tokenizer object
    with open('tokenizer.pickle', 'rb') as handle:
        tokenizer = pickle.load(handle)

    # load label encoder object
    with open('label_encoder.pickle', 'rb') as enc:
        lbl_encoder = pickle.load(enc)

    # parameters
    max_len = 20
    
    while True:
        print(Fore.LIGHTBLUE_EX + "User: " + Style.RESET_ALL, end="")
        inp = input()
        if inp.lower() == "quit":
            break

        result = model.predict(pad_sequences(tokenizer.texts_to_sequences([inp]),
                                             truncating='post', maxlen=max_len))
        tag = lbl_encoder.inverse_transform([np.argmax(result)])

        for i in data['intents']:
            if i['intent'] == tag:
                print(Fore.GREEN + "ChatBot:" + Style.RESET_ALL , np.random.choice(i['responses']))

        # print(Fore.GREEN + "ChatBot:" + Style.RESET_ALL,random.choice(responses))

print(Fore.YELLOW + "Start messaging with the bot (type quit to stop)!" + Style.RESET_ALL)
chat()

Start messaging with the bot (type quit to stop)!
ChatBot: Hola human, please tell me your GeniSys user
ChatBot: OK! Hola <HUMAN>, how can I help you?
ChatBot: My pleasure
ChatBot: I am sorry to disturb you
User: 