In [10]:
import json
import pickle

import numpy as np
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras import layers
from tensorflow import keras
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer

In [11]:
# Load intents JSON file:
with open("intents.json") as json_file:
    data = json.load(json_file)
    

In [12]:
# A place to store our training data:
training_sentences = []
training_labels = []
labels = []
responses = []


# Loop through each intent:
for intent in data['intents']:
    # Loop through each pattern:
    for pattern in intent['patterns']:
        # Store the pattern:
        training_sentences.append(pattern)
        # Store the associated tag:
        training_labels.append(intent['tag'])

    # Store possible responses:
    responses.append(intent['responses'])

    if intent['tag'] not in labels:
        labels.append(intent['tag'])


num_classes = len(labels)

In [13]:
# Implement label encoding:
label_encoder = LabelEncoder()
label_encoder.fit(training_labels)
training_labels = label_encoder.transform(training_labels)


In [14]:
# Vectorize the training data using Tokenization:
vocabulary_size = 1000
embedding_dimension = 16
max_length = 20
oov_token = "<OOV>"

tokenizer = Tokenizer(num_words=vocabulary_size, oov_token=oov_token)
tokenizer.fit_on_texts(training_sentences)
word_index = tokenizer.word_index
sequences = tokenizer.texts_to_sequences(training_sentences)
padded_sequences = pad_sequences(sequences,
                                 maxlen=max_length,
                                 truncating='post')

In [15]:
# Define Neural Network architecture:
model = keras.Sequential([
    layers.Embedding(vocabulary_size,
                     embedding_dimension,
                     input_length=max_length),
    layers.GlobalAveragePooling1D(),
    layers.Dense(units=64, activation='relu'),
    layers.Dense(units=64, activation='relu'),
    layers.Dense(units=num_classes, activation='softmax')
])

model.compile(loss='sparse_categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [16]:
model.summary()


Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 20, 16)            16000     
_________________________________________________________________
global_average_pooling1d_1 ( (None, 16)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 64)                1088      
_________________________________________________________________
dense_4 (Dense)              (None, 64)                4160      
_________________________________________________________________
dense_5 (Dense)              (None, 15)                975       
Total params: 22,223
Trainable params: 22,223
Non-trainable params: 0
_________________________________________________________________


In [17]:
epochs = 400
history = model.fit(padded_sequences,
                    np.array(training_labels),
                    epochs=epochs,
                    workers=4)

Epoch 1/400
Epoch 2/400
Epoch 3/400
Epoch 4/400
Epoch 5/400
Epoch 6/400
Epoch 7/400
Epoch 8/400
Epoch 9/400
Epoch 10/400
Epoch 11/400
Epoch 12/400
Epoch 13/400
Epoch 14/400
Epoch 15/400
Epoch 16/400
Epoch 17/400
Epoch 18/400
Epoch 19/400
Epoch 20/400
Epoch 21/400
Epoch 22/400
Epoch 23/400
Epoch 24/400
Epoch 25/400
Epoch 26/400
Epoch 27/400
Epoch 28/400
Epoch 29/400
Epoch 30/400
Epoch 31/400
Epoch 32/400
Epoch 33/400
Epoch 34/400
Epoch 35/400
Epoch 36/400
Epoch 37/400
Epoch 38/400
Epoch 39/400
Epoch 40/400
Epoch 41/400
Epoch 42/400
Epoch 43/400
Epoch 44/400
Epoch 45/400
Epoch 46/400
Epoch 47/400
Epoch 48/400
Epoch 49/400
Epoch 50/400
Epoch 51/400
Epoch 52/400
Epoch 53/400
Epoch 54/400
Epoch 55/400
Epoch 56/400
Epoch 57/400
Epoch 58/400
Epoch 59/400
Epoch 60/400
Epoch 61/400
Epoch 62/400
Epoch 63/400
Epoch 64/400
Epoch 65/400
Epoch 66/400
Epoch 67/400
Epoch 68/400
Epoch 69/400
Epoch 70/400
Epoch 71/400
Epoch 72/400
Epoch 73/400
Epoch 74/400
Epoch 75/400
Epoch 76/400
Epoch 77/400
Epoch 78

In [18]:
# Save the neural network model:
model.save("chat_model")

# Save the fitted tokenizer:
with open("tokenizer.pickle", "wb") as handle:
    pickle.dump(tokenizer, handle,
                protocol=pickle.HIGHEST_PROTOCOL)
    

# Save the fitted label encoder:
with open("label_encoder.pickle", "wb") as enc_file:
    pickle.dump(label_encoder, enc_file,
                protocol=pickle.HIGHEST_PROTOCOL)

INFO:tensorflow:Assets written to: chat_model\assets
