# Installing libs

In [37]:
%pip install -r ./requirements.txt

Note: you may need to restart the kernel to use updated packages.


# Importing libs

In [38]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
import random
import json
 
import warnings
warnings.filterwarnings('ignore')

# Loading training data

In [39]:
with open('./data/simple_intent.json', 'r') as f:
    data = json.load(f)

# print(data.keys())
# print(type(data['intents']))
# print(len(data['intents']))

training_intents = data['intents']

for key, value in training_intents[0].items():
    print(f"{key}: {value}")

intent: Greeting
text: ['Olá', 'Opa', 'Bom dia', 'Boa tarde', 'Boa noite', 'Olá, está aí?', 'Oi', 'Tudo bem?', 'dale']
responses: ['Olá! Como posso te ajudar?']
extension: {'function': '', 'entities': False, 'responses': []}
context: {'in': '', 'out': '', 'clear': False}
entityType: NA
entities: []


# Data cleaning

In [40]:
def clean(line):
    cleaned_line = ''
    for char in line:
        if char.isalpha():
            cleaned_line += char
        else:
            cleaned_line += ' '
    cleaned_line = ' '.join(cleaned_line.split())
    return cleaned_line

# Data pre-processing

In [41]:
   
#list of intents
intents = []                                            
unique_intents = []
#all text data to create a corpus
text_input= []    
#dictionary mapping intent with appropriate response
response_for_intent = {}                                
for intent in training_intents:
    intent_name = intent['intent']
    intent_texts = intent['text']
    #list of unique intents
    
    if intent_name not in unique_intents:            
        unique_intents.append(intent_name)  
    for text in intent_texts:
        #cleaning is done before adding text to corpus
        text_input.append(clean(text))                    
        intents.append(intent_name)
    if intent_name not in response_for_intent:
        response_for_intent[intent_name] = [] 
    for response in intent['responses']:
        response_for_intent[intent_name].append(response)

# Tokenization and Embedding
Now, our data is ready to be tokenized, with the help of inbuilt TensorFlow tokenizer, We can make both tokenization and embedding.

In [42]:
tokenizer = Tokenizer(filters='',oov_token='<unk>')
tokenizer.fit_on_texts(text_input)
sequences = tokenizer.texts_to_sequences(text_input)
padded_sequences = pad_sequences(sequences, padding='pre')
print('Shape of Input Sequence:',padded_sequences.shape)
padded_sequences[:5]

Shape of Input Sequence: (29, 3)


array([[ 0,  0,  2],
       [ 0,  0,  8],
       [ 0,  9, 10],
       [ 0,  3, 11],
       [ 0,  3, 12]], dtype=int32)

# Feature Extraction
Neural network cannot process sentences, so numerical representation of sentences have to be provided to it, this is done by doing Feature Extraction, for that we map all words with their indexes and create a matrix mapping it to its category (intent).

In [43]:
   
intent_to_index = {}
categorical_target = []
index = 0
 
for intent in intents:
    if intent not in intent_to_index:
        intent_to_index[intent] = index
        index += 1
    categorical_target.append(intent_to_index[intent])
 
num_classes = len(intent_to_index)
print('Number of Intents :',num_classes)
 
# Convert intent_to_index to index_to_intent
index_to_intent = {index: intent for intent, index in intent_to_index.items()}
index_to_intent

Number of Intents : 3


{0: 'Greeting', 1: 'Thanks', 2: 'Cancel'}

In [44]:
categorical_vec = tf.keras.utils.to_categorical(
    categorical_target, 
    num_classes=num_classes,
    dtype='int32'
)
 
print('Shape of Ca',categorical_vec.shape)
categorical_vec[:5]

Shape of Ca (29, 3)


array([[1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0]], dtype=int32)

# Model Building
First step of building a model is defining hyperparameters, in simple words they are settings which we predefine before training our model. They include parameters like no. of epochs, embedding dimensions, size of vocabulary, and target length. They can be adjusted accordingly, to increase performance of model.

In [45]:
# These hyper parameters could be the target of an optmization study   
epochs=100
embed_dim=300
lstm_num=50

output_dim=categorical_vec.shape[1]
input_dim=len(unique_intents)
print("Input Dimension :{},\nOutput Dimension :{}".format(input_dim,output_dim))

Input Dimension :3,
Output Dimension :3


As both input dimension and output dimension are same, we can proceed with building our model.

Now, we can define the architecture of our neural network using TensorFlow. A common model for intent recognition is the recurrent neural network (RNN) or its variant, the long short-term memory (LSTM) network. These networks can handle sequential data, such as sentences, effectively. We can also use pre-trained models like BERT or GPT to achieve better performance.

Here we are using a RNN, by using ‘Sequential’ model from TensorFlow’s Keras API. It consists of an embedding layer, an LSTM layer for sequence processing, and two dense layers for classification. You can see model summary below.

In [46]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Embedding(len(tokenizer.word_index) + 1, embed_dim),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(lstm_num, dropout=0.1)),  
    tf.keras.layers.Dense(lstm_num, activation='relu'),
    tf.keras.layers.Dropout(0.4),
    tf.keras.layers.Dense(output_dim, activation='softmax')
])
 
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)  
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_2 (Embedding)     (None, None, 300)         10500     
                                                                 
 bidirectional_2 (Bidirecti  (None, 100)               140400    
 onal)                                                           
                                                                 
 dense_4 (Dense)             (None, 50)                5050      
                                                                 
 dropout_2 (Dropout)         (None, 50)                0         
                                                                 
 dense_5 (Dense)             (None, 3)                 153       
                                                                 
Total params: 156103 (609.78 KB)
Trainable params: 156103 (609.78 KB)
Non-trainable params: 0 (0.00 Byte)
______________

# Traning

In [47]:
model.fit(padded_sequences, categorical_vec, epochs=epochs, verbose=0)

<keras.src.callbacks.History at 0x7fb5007d0f90>

# Evaluation

In [51]:
tests = [
    ("Oi", "Greeting"),
    ("Obrigado", "Thanks"),
    ("Grato", "Thanks"),
    ("Valeu", "Thanks"),
    ("vlw", "Thanks"),
    ("Opa", "Greeting"),
    ("Dale", "Greeting"),
    ("oioi", "Greeting"),
    ("ei", "Greeting"),
    ("Grata!", "Thanks"),
    ("Muito obrigada, de verdade", "Thanks"),
    ("Cancelar", "Cancel"),
    ("Deixa para outro dia", "Cancel"),
    ("encerrar", "Cancel"),
    ("cancela", "Cancel"),
    ("Esquece", "Cancel"),
    ("encerra", "Cancel")
]

test_text_inputs, test_intents = [], []
for test in tests:
    test_text_inputs.append(test[0])
    test_intents.append(test[1])
 
test_sequences = tokenizer.texts_to_sequences(test_text_inputs)
test_padded_sequences = pad_sequences(test_sequences,  padding='pre')
test_labels = np.array([unique_intents.index(intent) for intent in test_intents])
test_labels = tf.keras.utils.to_categorical(test_labels, num_classes=num_classes)
loss, accuracy = model.evaluate(test_padded_sequences, test_labels)



# Prediction

In [52]:
def response(sentence):
    sent_tokens = []
    # Split the input sentence into words
    words = sentence.split()
    # Convert words to their corresponding word indices
    for word in words:                                           
        if word in tokenizer.word_index:
            sent_tokens.append(tokenizer.word_index[word])
        else:
            # Handle unknown words
            sent_tokens.append(tokenizer.word_index['<unk>'])
    sent_tokens = tf.expand_dims(sent_tokens, 0)
    #predict numerical category
    pred = model(sent_tokens)
    print(pred)
    #category to intent
    pred_class = np.argmax(pred.numpy(), axis=1)    
    print(pred_class)            
    # random response to that intent
    return random.choice(
        response_for_intent[index_to_intent[pred_class[0]]]), index_to_intent[pred_class[0]]

   
query = "Obrigado"
bot_response, typ = response(query)
print(f"Response: {bot_response}")
print(f"Intent: {typ}")

tf.Tensor([[0.43986425 0.2672869  0.29284883]], shape=(1, 3), dtype=float32)
[0]
Response: Olá! Como posso te ajudar?
Intent: Greeting
