# Building a Chatbot with NLP and GRU model and attention mechanism
### Importing the necessary libraries

In [1]:
import tensorflow as tf
physical_devices = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], enable = True)
import numpy as np
from tensorflow.keras.preprocessing.text import tokenizer_from_json
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Dense, GRU, LSTM, Masking
import json
import time
import re

### Importing preprocessed data

In [2]:
with open('./preprocessed_data/questions.json', 'r') as f:
    json_data = json.load(f)
    question_corpus = tokenizer_from_json(json_data)
    f.close()

with open('./preprocessed_data/answers.json', 'r') as f:
    json_data = json.load(f)
    answer_corpus = tokenizer_from_json(json_data)
    f.close()

npzfile = np.load('./preprocessed_data/data.npz')

#### Some unknown reason make the corpus contain all words and labels in raw data rather than a vocabulary with limited size so we have to build the dict manually

In [3]:
q_word2ind = {e:i for e, i in question_corpus.word_index.items() if i <= 5001}
q_ind2word = {e:i for i, e in q_word2ind.items()}
a_word2ind = {e:i for e, i in answer_corpus.word_index.items() if i <= 5001}
a_ind2word = {e:i for i, e in a_word2ind.items()}

### Creating the Encoder

In [4]:
def Encoder(inputdim, embeddingsize, inputlen, n_units):

    encoder_input = Input((inputlen,))
    encoder_embed = Embedding(inputdim, embeddingsize)(encoder_input)
    encoder_mask = Masking()(encoder_embed)
    
    encoder = GRU(n_units, return_sequences = True, return_state = True)
    encoder_output, encoder_state = encoder(encoder_mask)
    
    encoder=Model(encoder_input, [encoder_output, encoder_state])
    
    return encoder

### Creating the Bahdanau Attention

In [5]:
class BahdanauAttention(tf.keras.layers.Layer):

    def __init__(self, units):
        super(BahdanauAttention, self).__init__()
        self.W1 = tf.keras.layers.Dense(units)
        self.W2 = tf.keras.layers.Dense(units)
        self.V = tf.keras.layers.Dense(1)
        
    def call(self, query, values):
        hidden_with_time_axis = tf.expand_dims(query, 1)
        score = self.V(tf.nn.tanh(self.W1(values) + self.W2(hidden_with_time_axis)))
        attention_weights = tf.nn.softmax(score, axis = 1)
        context_vector = attention_weights * values
        context_vector = tf.reduce_sum(context_vector, axis = 1)

        return context_vector, attention_weights

### Creating the Decoder

In [6]:
def Decoder(inputdim, embeddingsize, inputlen, units):
    
    # The size of input at here is 1 because we want to predict the answer step by step, each time only input 1 word
    dec_input = Input((1,))
    
    # Vectorizing input answers
    dec_embed = Embedding(inputdim, embeddingsize)(dec_input)
    
    # Input of encoder state vectors
    enc_output = Input((inputlen, units, ))
    hidden = Input((units, ))
    context_vector,attention_weights = BahdanauAttention(units)(hidden, enc_output)

    context_expand = tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, 1))(context_vector)
    full_context = tf.concat([tf.expand_dims(context_vector, 1), dec_embed], axis = -1)

    output, state = GRU(units, return_sequences = True, return_state = True)(full_context)

    flat_output = tf.keras.layers.Flatten()(output)
    
    # Using activation function as softmax layer, predict the most potential sentence of reply
    decoder_output = Dense(inputdim, activation = 'softmax')(flat_output)

    decoder = Model([enc_output, hidden, dec_input], [decoder_output, state])

    return decoder

### Defining Hyperparameters

In [7]:
vocab_size = 5001
embedding_size = 128
n_unit = 256
batch_size = 64
question_len = npzfile['arr_0'].shape[1]
answer_len = npzfile['arr_1'].shape[1]

In [8]:
encoder = Encoder(vocab_size, embedding_size, question_len, n_unit)

In [9]:
encoder.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 21)]              0         
_________________________________________________________________
embedding (Embedding)        (None, 21, 128)           640128    
_________________________________________________________________
masking (Masking)            (None, 21, 128)           0         
_________________________________________________________________
gru (GRU)                    [(None, 21, 256), (None,  296448    
Total params: 936,576
Trainable params: 936,576
Non-trainable params: 0
_________________________________________________________________


In [10]:
decoder = Decoder(vocab_size, embedding_size, question_len, n_unit)

In [11]:
decoder.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            [(None, 256)]        0                                            
__________________________________________________________________________________________________
input_3 (InputLayer)            [(None, 21, 256)]    0                                            
__________________________________________________________________________________________________
bahdanau_attention (BahdanauAtt ((None, 256), (None, 131841      input_4[0][0]                    
                                                                 input_3[0][0]                    
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 1)]          0                                      

In [12]:
encoder.load_weights('./trained_model/attention_encoder_test5000.h5')
decoder.load_weights('./trained_model/attention_decoder_test5000.h5')

In [13]:
def clean_text(text):

    # Remove unnecessary characters in sentences
    
    text = text.lower().strip()
    text = re.sub(r"i'm", "i am", text)
    text = re.sub(r"he's", "he is", text)
    text = re.sub(r"she's", "she is", text)
    text = re.sub(r"it's", "it is", text)
    text = re.sub(r"that's", "that is", text)
    text = re.sub(r"what's", "what is", text)
    text = re.sub(r"where's", "where is", text)
    text = re.sub(r"there's", "there is", text)
    text = re.sub(r"how's", "how is", text)
    text = re.sub(r"\'ll", " will", text)
    text = re.sub(r"\'ve", " have", text)
    text = re.sub(r"\'re", " are", text)
    text = re.sub(r"\'d", " would", text)
    text = re.sub(r"\'re", " are", text)
    text = re.sub(r"won't", "will not", text)
    text = re.sub(r"can't", "cannot", text)
    text = re.sub(r"n't", " not", text)
    text = re.sub(r"n'", "ng", text)
    text = re.sub(r"'bout", "about", text)
    text = re.sub(r"'til", "until", text)
    text = re.sub(r'[" "]+', " ", text)
    text = re.sub(r"[-()\"#/@;:<>{}`+=~|.!?,]", "", text)
    
    return text

### Evaluating the chat

In [14]:
def evaluate(sentence):
    
    # Cleaning the input text
    sentence = clean_text(sentence) 
    encoder_inputs = []
    
    # Converting the input text to index sequence and use unk replace the word not in vocabulary
    for word in sentence.split():
        if word in q_word2ind.keys():
            encoder_inputs.append(q_word2ind[word])
        elif word not in q_word2ind.keys():
            encoder_inputs.append(q_word2ind['unk'])
            
    encoder_inputs = tf.keras.preprocessing.sequence.pad_sequences([encoder_inputs], maxlen = question_len, padding = 'post')
    encoder_inputs = tf.convert_to_tensor(encoder_inputs)
    encoder_output, encoder_state = encoder(encoder_inputs)
    
    hidden_state = encoder_state
    decoder_input = tf.expand_dims([a_word2ind['bos']], 0)
    
    result = ''
    
    for t in range(answer_len):
        pred, state = decoder([encoder_output, hidden_state, decoder_input])
        pred = np.squeeze(pred)
        pred_ind = tf.math.argmax(pred).numpy()
        
        if a_ind2word[pred_ind] == 'eos':
            return result

        result += a_ind2word[pred_ind] + ' '
        
        # Passing the predict index and state vectors to the next input
        decoder_input = tf.expand_dims([pred_ind],0)
        hidden_state = state
        
    return result

In [15]:
while True:
    inputs = input('User :> ')
    if inputs == 'quit':
        break

    result = evaluate(inputs)

    print('Bot :> ' + result)

User :>  hello


Bot :> hello unk 


User :>  hi


Bot :> hi 


User :>  how are you


Bot :> fine i am fine 


User :>  really?


Bot :> i am sorry 


User :>  where are you from


Bot :> unk 


User :>  where are you


Bot :> i am here 


User :>  the news is great


Bot :> what is the unk 


User :>  that girl is beautiful


Bot :> unk 


User :>  sweet


Bot :> i will be unk 


User :>  nice


Bot :> thanks 


User :>  lets see other


Bot :> and what about 


User :>  movies


Bot :> thank you 


User :>  you are welcome


Bot :> i am going to be unk 


User :>  quit
