In [1]:
import json
import pickle

import numpy as np
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.layers import Dense, Embedding, GlobalAveragePooling1D
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer

In [2]:
# Load intents JSON file:
with open("intents.json") as json_file:
    data = json.load(json_file)
    

In [3]:
# A place to store our training data:
training_sentences = []
training_labels = []
labels = []
responses = []


# Loop through each intent:
for intent in data['intents']:
    # Loop through each pattern:
    for pattern in intent['patterns']:
        # Store the pattern:
        training_sentences.append(pattern)
        # Store the associated tag:
        training_labels.append(intent['tag'])

    # Store possible responses:
    responses.append(intent['responses'])

    if intent['tag'] not in labels:
        labels.append(intent['tag'])


num_classes = len(labels)

In [4]:
# Implement label encoding:
label_encoder = LabelEncoder()
label_encoder.fit(training_labels)
training_labels = label_encoder.transform(training_labels)


In [5]:
# Vectorize the training data using Tokenization:
vocabulary_size = 1000
embedding_dimension = 16
max_length = 20
oov_token = "<OOV>"

tokenizer = Tokenizer(num_words=vocabulary_size, oov_token=oov_token)
tokenizer.fit_on_texts(training_sentences)
word_index = tokenizer.word_index
sequences = tokenizer.texts_to_sequences(training_sentences)
padded_sequences = pad_sequences(sequences,
                                 maxlen=max_length,
                                 truncating='post')

In [6]:
# Define Neural Network architecture:
model = Sequential()
model.add(Embedding(vocabulary_size,
                    embedding_dimension,
                    input_length=max_length))
model.add(GlobalAveragePooling1D())
model.add(Dense(32, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))

model.compile(loss='sparse_categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [7]:
model.summary()


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 20, 16)            16000     
_________________________________________________________________
global_average_pooling1d (Gl (None, 16)                0         
_________________________________________________________________
dense (Dense)                (None, 32)                544       
_________________________________________________________________
dense_1 (Dense)              (None, 32)                1056      
_________________________________________________________________
dense_2 (Dense)              (None, 15)                495       
Total params: 18,095
Trainable params: 18,095
Non-trainable params: 0
_________________________________________________________________


In [8]:
epochs = 400
history = model.fit(padded_sequences,
                    np.array(training_labels),
                    epochs=epochs,
                    workers=4)

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

In [9]:
# Save the neural network model:
model.save("chat_model")

# Save the fitted tokenizer:
with open("tokenizer.pickle", "wb") as handle:
    pickle.dump(tokenizer, handle,
                protocol=pickle.HIGHEST_PROTOCOL)
    

# Save the fitted label encoder:
with open("label_encoder.pickle", "wb") as enc_file:
    pickle.dump(label_encoder, enc_file,
                protocol=pickle.HIGHEST_PROTOCOL)

Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
INFO:tensorflow:Assets written to: chat_model\assets
