# NN Chatbot Training 

### Trains the conversational attributes of our chatbot
<hr>

<br>

# Imports
<hr>

In [40]:
# Import

import json 
import numpy as np 

import pickle

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, GlobalAveragePooling1D
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder

# Var Declaration

In [41]:
# 
training_sentences = []
training_labels = []
labels = []
responses = []
#
vocab_size = 1000
embedding_dim = 16
max_len = 20 
oov_token = '<00V>'

<br>

# Data Ingestion
<hr>

In [42]:
"""
    This code block opens the json file and enters the data into arrays (lists),
    so that it can be feed into the pipeline. 

"""

# Open json file
with open('../data/intents.json') as file:
    data = json.load(file)



# loop to load data 
for intent in data['intents']:
    for pattern in intent['patterns']:
        #
        training_sentences.append(pattern)
        training_labels.append(intent['tag'])
    #
    responses.append(intent['responses'])

    # creates labels list
    if intent['tag'] not in labels:
        labels.append(intent['tag'])


num_classes = len(labels)

In [43]:
# inspect Data

print(f'Training Sentences: {training_sentences[:7]}')
print(f'Training Labels:    {training_labels[:6]}')
print(f'Labels:             {labels[:7]}')
print(f'Responses:          {responses[:3]}')

Training Sentences: ['Hi', 'Hey', 'Is anyone there?', 'Hello', 'Hay', 'Bye', 'See you later']
Training Labels:    ['greeting', 'greeting', 'greeting', 'greeting', 'greeting', 'goodbye']
Labels:             ['greeting', 'goodbye', 'thanks', 'about', 'name', 'help', 'bookaroom']
Responses:          [['Hello', 'Hi', 'Hi there'], ['See you later', 'Have a nice day', 'Bye! Come back again'], ['Happy to help!', 'Any time!', 'My pleasure', "You're most welcome!"]]


<br> 

# Preprocessing
<hr>

##### Normalize Labels w/LabelEncoder 

In [44]:

# define model 
lbl_encoder = LabelEncoder()
# fit model 
lbl_encoder.fit(training_labels)
# transform model 
training_labels = lbl_encoder.transform(training_labels)



In [45]:
# View Encoded Labels

training_labels

array([4, 4, 4, 4, 4, 3, 3, 3, 7, 7, 7, 7, 0, 0, 0, 6, 6, 6, 5, 5, 5, 5,
       5, 5, 5, 1, 1, 1, 1, 1, 2, 2, 2])


##### Normalize Tokenizer Data

In [46]:
"""
 Tokenize data
 
 setting number of words to 1,000
 Using oov_token 

"""

# declare tokenizer
tokenizer = Tokenizer(num_words = vocab_size, oov_token=oov_token)
# fit tokenier
tokenizer.fit_on_texts(training_sentences)
# creates dictonary of words
word_index = tokenizer.word_index
sequences = tokenizer.texts_to_sequences(training_sentences)
# creates equal sizes between the text sequences
padded_sequences = pad_sequences(sequences, truncating = 'post', maxlen = max_len)


In [47]:
# inspect tokenized var

print()
print(f'Len of word_index: {len(word_index)}')
print(f'Sequences:         {sequences[:7]} ')
print()
print('padded Sequences: ')
padded_sequences[:3]


Len of word_index: 54
Sequences:         [[27], [28], [9, 29, 16], [30], [31], [32], [33, 2, 34]] 

padded Sequences: 


array([[ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0, 27],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0, 28],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  9, 29, 16]], dtype=int32)

In [48]:
# define model 

model = Sequential()
model.add(Embedding(vocab_size, embedding_dim, input_length=max_len))
model.add(GlobalAveragePooling1D())
model.add(Dense(16, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(num_classes, activation = 'softmax'))


In [49]:
# Compile model

model.compile(loss = 'sparse_categorical_crossentropy',
              optimizer='adam', metrics=['accuracy'])


In [50]:
# Model Summory 

model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_3 (Embedding)     (None, 20, 16)            16000     
                                                                 
 global_average_pooling1d_3   (None, 16)               0         
 (GlobalAveragePooling1D)                                        
                                                                 
 dense_9 (Dense)             (None, 16)                272       
                                                                 
 dense_10 (Dense)            (None, 16)                272       
                                                                 
 dense_11 (Dense)            (None, 8)                 136       
                                                                 
Total params: 16,680
Trainable params: 16,680
Non-trainable params: 0
__________________________________________________

In [51]:
# Train Model 

epochs = 555
history = model.fit(padded_sequences, np.array(training_labels),  epochs=epochs)

Epoch 1/555
Epoch 2/555
Epoch 3/555
Epoch 4/555
Epoch 5/555
Epoch 6/555
Epoch 7/555
Epoch 8/555
Epoch 9/555
Epoch 10/555
Epoch 11/555
Epoch 12/555
Epoch 13/555
Epoch 14/555
Epoch 15/555
Epoch 16/555
Epoch 17/555
Epoch 18/555
Epoch 19/555
Epoch 20/555
Epoch 21/555
Epoch 22/555
Epoch 23/555
Epoch 24/555
Epoch 25/555
Epoch 26/555
Epoch 27/555
Epoch 28/555
Epoch 29/555
Epoch 30/555
Epoch 31/555
Epoch 32/555
Epoch 33/555
Epoch 34/555
Epoch 35/555
Epoch 36/555
Epoch 37/555
Epoch 38/555
Epoch 39/555
Epoch 40/555
Epoch 41/555
Epoch 42/555
Epoch 43/555
Epoch 44/555
Epoch 45/555
Epoch 46/555
Epoch 47/555
Epoch 48/555
Epoch 49/555
Epoch 50/555
Epoch 51/555
Epoch 52/555
Epoch 53/555
Epoch 54/555
Epoch 55/555
Epoch 56/555
Epoch 57/555
Epoch 58/555
Epoch 59/555
Epoch 60/555
Epoch 61/555
Epoch 62/555
Epoch 63/555
Epoch 64/555
Epoch 65/555
Epoch 66/555
Epoch 67/555
Epoch 68/555
Epoch 69/555
Epoch 70/555
Epoch 71/555
Epoch 72/555
Epoch 73/555
Epoch 74/555
Epoch 75/555
Epoch 76/555
Epoch 77/555
Epoch 78

# Making a Pickle
<hr>

In [52]:
# save the trained model 
model.save('../assets/chat_model')

# save the fitted Tokenizer
with open('../assets/tokenizer.pickle','wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)

# to save the fittend label encoder
with open('../assets/label_encoder.pickle', 'wb') as ecn_file:
    pickle.dump(lbl_encoder, ecn_file, protocol=pickle.HIGHEST_PROTOCOL)


    

INFO:tensorflow:Assets written to: ../assets/chat_model/assets
