In [5]:
import numpy as np
import json
import re
import tensorflow as tf
import warnings

import spacy
nlp = spacy.load('en_core_web_sm')

warnings.filterwarnings('ignore')

In [2]:
!pip install spacy

Defaulting to user installation because normal site-packages is not writeable
Collecting spacy
  Downloading spacy-3.3.1-cp310-cp310-win_amd64.whl (11.7 MB)
     ---------------------------------------- 11.7/11.7 MB 4.8 MB/s eta 0:00:00
Collecting blis<0.8.0,>=0.4.0
  Downloading blis-0.7.7-cp310-cp310-win_amd64.whl (6.6 MB)
     ---------------------------------------- 6.6/6.6 MB 2.5 MB/s eta 0:00:00
Collecting preshed<3.1.0,>=3.0.2
  Using cached preshed-3.0.6-cp310-cp310-win_amd64.whl (112 kB)
Collecting typer<0.5.0,>=0.3.0
  Downloading typer-0.4.1-py3-none-any.whl (27 kB)
Collecting murmurhash<1.1.0,>=0.28.0
  Downloading murmurhash-1.0.7-cp310-cp310-win_amd64.whl (18 kB)
Collecting srsly<3.0.0,>=2.4.3
  Downloading srsly-2.4.3-cp310-cp310-win_amd64.whl (448 kB)
     -------------------------------------- 448.3/448.3 KB 2.0 MB/s eta 0:00:00
Collecting pydantic!=1.8,!=1.8.1,<1.9.0,>=1.7.4
  Downloading pydantic-1.8.2-py3-none-any.whl (126 kB)
     ----------------------------------

You should consider upgrading via the 'C:\Program Files\Python310\python.exe -m pip install --upgrade pip' command.



Installing collected packages: wasabi, murmurhash, cymem, spacy-loggers, spacy-legacy, smart-open, pydantic, preshed, langcodes, catalogue, blis, typer, srsly, thinc, pathy, spacy
Successfully installed blis-0.7.7 catalogue-2.0.7 cymem-2.0.6 langcodes-3.3.0 murmurhash-1.0.7 pathy-0.6.1 preshed-3.0.6 pydantic-1.8.2 smart-open-5.2.1 spacy-3.3.1 spacy-legacy-3.0.9 spacy-loggers-1.0.2 srsly-2.4.3 thinc-8.0.17 typer-0.4.1 wasabi-0.9.1


In [6]:
''' reading data '''
with open('Intent.json', 'rb') as file:
    data = json.load(file)

In [7]:
''' preprocessing '''
def pre_processing(line):
    line = re.sub(r'[^a-zA-z.?!\']', ' ', line)
    line = re.sub(r'[ ]+', ' ', line)
    return line

In [9]:
'''get text and intent title from json data'''
inputs, targets = [], []
cls = []
intent_doc = {}

for i in data['intents']:
    if i['intent'] not in cls:
        cls.append(i['intent'])
        
    if i['intent'] not in intent_doc:
        intent_doc[i['intent']] = []
        
    for text in i['text']:
        inputs.append(pre_processing(text))
        targets.append(i['intent'])
        
    for response in i['responses']:
        intent_doc[i['intent']].append(response)

In [10]:
''' tokenize data '''
def token_data(inp_list):
    tokenizer = tf.keras.preprocessing.text.Tokenizer(filters='', oov_token='<unk>')
    
    tokenizer.fit_on_texts(inp_list)
    
    inp_seq = tokenizer.texts_to_sequences(inp_list)

    ''' adding padding '''
    inp_seq = tf.keras.preprocessing.sequence.pad_sequences(inp_seq, padding='pre')
    
    return tokenizer, inp_seq

'''preprocess input data'''
tokenizer, inp_tensor = token_data(inputs)

In [11]:
def cr_cat_target(targets):
    word = {}
    cat_t = []
    counter=0
    
    for trg in targets:
        if trg not in word:
            word[trg]=counter
            counter+=1
        cat_t.append(word[trg])
    
    cat_tensor = tf.keras.utils.to_categorical(cat_t, num_classes=len(word), dtype='int32')
    return cat_tensor, dict((v,k) for k, v in word.items())

'''preprocess output data'''
target_tensor, target_idx_word = cr_cat_target(targets)

In [12]:
print('input shape: {} and output shape: {}'.format(inp_tensor.shape, target_tensor.shape))

input shape: (143, 9) and output shape: (143, 22)


In [13]:
''' Build Model '''
''' hyperparameters'''
epochs=50
vocab_size = len(tokenizer.word_index) + 1
embed_dim = 512
units=128
target_len = target_tensor.shape[1]

''' Model '''
model = tf.keras.models.Sequential([
    ## Embedding Layer 
    tf.keras.layers.Embedding(vocab_size, embed_dim),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(units, dropout=0.2)),
    ## Hidden Layer 
    tf.keras.layers.Dense(units, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    ## Classification Layer 
    tf.keras.layers.Dense(target_len, activation='softmax')])

In [14]:
''' Compile the model '''
model.compile(optimizer=tf.keras.optimizers.Adam(lr=1e-2), loss='categorical_crossentropy', metrics=['accuracy'])

''' lets see how model looks like '''
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, None, 512)         66048     
                                                                 
 bidirectional (Bidirectiona  (None, 256)              656384    
 l)                                                              
                                                                 
 dense (Dense)               (None, 128)               32896     
                                                                 
 dropout (Dropout)           (None, 128)               0         
                                                                 
 dense_1 (Dense)             (None, 22)                2838      
                                                                 
Total params: 758,166
Trainable params: 758,166
Non-trainable params: 0
__________________________________________________

In [10]:
''' EarlyStopping'''
early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=4)

''' training '''
model.fit(inp_tensor, target_tensor, epochs=epochs, callbacks=[early_stop])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50


<tensorflow.python.keras.callbacks.History at 0x7fb0107ee110>

In [None]:
def response(sentence):
    sent_seq = []
    doc = nlp(repr(sentence))
    
    # split the input sentences into words
    for token in doc:
        if token.text in tokenizer.word_index:
            sent_seq.append(tokenizer.word_index[token.text])

        # handle the unknown words error
        else:
            sent_seq.append(tokenizer.word_index['<unk>'])

    sent_seq = tf.expand_dims(sent_seq, 0)
    # predict the category of input sentences
    pred = model(sent_seq)

    pred_class = np.argmax(pred.numpy(), axis=1)
    
    # choice a random response for predicted sentence
    return random.choice(intent_doc[trg_index_word[pred_class[0]]]), trg_index_word[pred_class[0]]

# chat with bot
print("Note: Enter 'quit' to break the loop.")
while True:
    input_ = input('You: ')
    if input_.lower() == 'quit':
        break
    res, typ = response(input_)
    print('Bot: {} -- TYPE: {}'.format(res, typ))
    print()

Note: Enter 'quit' to break the loop.
