# TRAINING MODEL (CNN+RNN)

#IMPORTING REQUIRED LIBRARIES AND MODULES

In [1]:
import json
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, GlobalAveragePooling1D
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder
from keras.models import Sequential
from keras.layers import Dense
from keras.utils.vis_utils import plot_model
import pickle

# CONNECTING WITH GOOGLE DRIVE


In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


#IMPORTING INTENT DATASET IN JSON FORMAT

In [3]:
with open('/content/gdrive/MyDrive/SoftComputing/intents.json') as file:
    data = json.load(file)

#FEATURE ENGINEERING

In [4]:
training_sentences = []
training_labels = []
labels = []
responses = []

In [5]:
for intent in data['intents']:
    for pattern in intent['patterns']:
        training_sentences.append(pattern)
        training_labels.append(intent['tag'])
    responses.append(intent['responses'])

    if intent['tag'] not in labels:
        labels.append(intent['tag'])


In [6]:
num_classes = len(labels)

lbl_encoder = LabelEncoder()
lbl_encoder.fit(training_labels)
training_labels = lbl_encoder.transform(training_labels)


In [7]:
vocab_size = 1000
embedding_dim = 16
max_len = 20
oov_token = "<OOV>"


In [8]:
# adding out of vocabulary token
tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_token) 
tokenizer.fit_on_texts(training_sentences)
word_index = tokenizer.word_index
sequences = tokenizer.texts_to_sequences(training_sentences)
padded_sequences = pad_sequences(sequences, truncating='post', maxlen=max_len)


#Model Building ( CNN Sequential + RNN )

In [9]:
# CNN Sequential model building
model = Sequential()
model.add(Embedding(vocab_size, embedding_dim, input_length=max_len))
model.add(GlobalAveragePooling1D())
model.add(Dense(16, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))


In [10]:
# Compliling everything
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [11]:
# displaying model architecture/design
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 20, 16)            16000     
                                                                 
 global_average_pooling1d (G  (None, 16)               0         
 lobalAveragePooling1D)                                          
                                                                 
 dense (Dense)               (None, 16)                272       
                                                                 
 dense_1 (Dense)             (None, 16)                272       
                                                                 
 dense_2 (Dense)             (None, 32)                544       
                                                                 
Total params: 17,088
Trainable params: 17,088
Non-trainable params: 0
____________________________________________________

In [12]:
# defining epochs as 550
epochs = 550
history = model.fit(padded_sequences, np.array(training_labels), epochs=epochs)
model.compile(loss='mse', optimizer='adam')

Epoch 1/550
Epoch 2/550
Epoch 3/550
Epoch 4/550
Epoch 5/550
Epoch 6/550
Epoch 7/550
Epoch 8/550
Epoch 9/550
Epoch 10/550
Epoch 11/550
Epoch 12/550
Epoch 13/550
Epoch 14/550
Epoch 15/550
Epoch 16/550
Epoch 17/550
Epoch 18/550
Epoch 19/550
Epoch 20/550
Epoch 21/550
Epoch 22/550
Epoch 23/550
Epoch 24/550
Epoch 25/550
Epoch 26/550
Epoch 27/550
Epoch 28/550
Epoch 29/550
Epoch 30/550
Epoch 31/550
Epoch 32/550
Epoch 33/550
Epoch 34/550
Epoch 35/550
Epoch 36/550
Epoch 37/550
Epoch 38/550
Epoch 39/550
Epoch 40/550
Epoch 41/550
Epoch 42/550
Epoch 43/550
Epoch 44/550
Epoch 45/550
Epoch 46/550
Epoch 47/550
Epoch 48/550
Epoch 49/550
Epoch 50/550
Epoch 51/550
Epoch 52/550
Epoch 53/550
Epoch 54/550
Epoch 55/550
Epoch 56/550
Epoch 57/550
Epoch 58/550
Epoch 59/550
Epoch 60/550
Epoch 61/550
Epoch 62/550
Epoch 63/550
Epoch 64/550
Epoch 65/550
Epoch 66/550
Epoch 67/550
Epoch 68/550
Epoch 69/550
Epoch 70/550
Epoch 71/550
Epoch 72/550
Epoch 73/550
Epoch 74/550
Epoch 75/550
Epoch 76/550
Epoch 77/550
Epoch 78

In [13]:
# saving model
model.save('/content/gdrive/MyDrive/SoftComputing/chat_model')

INFO:tensorflow:Assets written to: /content/gdrive/MyDrive/SoftComputing/chat_model/assets


In [14]:
# saving tokenizer
with open('/content/gdrive/MyDrive/SoftComputing/tokenizer.pickle' ,'wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)

# saving label encoder
with open('/content/gdrive/MyDrive/SoftComputing/label_encoder.pickle', 'wb') as ecn_file:
    pickle.dump(lbl_encoder, ecn_file, protocol=pickle.HIGHEST_PROTOCOL)