## 1. Importing Dataset

In [3]:
import numpy as np
import pandas as pd 
import re
import os
import random
human_txt_path = '/kaggle/input/rdany-conversations/human_text.txt'
robot_txt_path = '/kaggle/input/rdany-conversations/robot_text.txt'

# Storing txt content as lists
with open(human_txt_path, 'r', encoding = 'utf-8') as f:
    human_lines = f.read().split('\n')

with open(robot_txt_path, 'r', encoding = 'utf-8') as f:
    robot_lines = f.read().split('\n')

In [4]:
len(human_lines)

2363

In [5]:
len(robot_lines)

2363

## 2. Cleaning the data
1. Removing the non-speaking words like [start], [silent] from the texts. Removing emotes and symbols characters from the texts. <br>
2. Creating the human-bot response pairs.

In [6]:
human_lines = [re.sub(r'\[\w+\]','hi',line) for line in human_lines]
human_lines = [' '.join(re.findall(r'\w+', line)) for line in human_lines]

robot_lines = [re.sub(r'\[\w+\]','',line) for line in robot_lines]
robot_lines = [' '.join(re.findall(r'\w+', line)) for line in robot_lines]

pairs = list(zip(human_lines, robot_lines))

## 3. Feature Engineering
Taking 400 pair of human-responses responses and adding <START> and <STOP> tags to the bot responses for the machine to identify the end of string.

In [7]:
input_txt = []
target_txt = []

input_tokens = set()
target_tokens = set()

for line in pairs[:400]:
    input_, target_ = line[0], line[1]
    input_txt.append(input_)
    target_ = ' '.join(re.findall(r"[\w']+|[^\s\w]", target_))
    target_ = '<START> ' + target_ + ' <STOP>'
    target_txt.append(target_)
    
    for token in re.findall(r"[\w']+|[^\s\w]", input_):
        if token not in input_tokens:
            input_tokens.add(token)
    for token in target_.split():
        if token not in target_tokens:
            target_tokens.add(token)

In [8]:
input_tokens = sorted(list(input_tokens))
target_tokens = sorted(list(target_tokens))
num_encoder_tokens = len(input_tokens)
num_decoder_tokens = len(target_tokens)
print(num_decoder_tokens)

3192


In [9]:
# Adding indexes to the vocabulary and storing it in dictonary of input and target respectivly.

input_dict = dict([(token, i) for i, token in enumerate(input_tokens)])
target_dict = dict([(token, i) for i, token in enumerate(target_tokens)])

reverse_input_dict = {token: index for index, token in input_dict.items()} 
reverse_target_dict = {token: index for index, token in target_dict.items()} 


## 4. Train data creation

In [10]:
max_encoder_seq_length = max([len(re.findall(r"[\w']+|[^\s\w]", human_line)) for human_line in input_txt])
max_decoder_seq_length = max([len(re.findall(r"[\w']+|[^\s\w]", robot_line)) for robot_line in target_txt])

encoder_input_data = np.zeros((len(input_txt), max_encoder_seq_length, num_encoder_tokens), dtype = 'float32')
decoder_input_data = np.zeros((len(target_txt), max_decoder_seq_length, num_decoder_tokens), dtype = 'float32')
decoder_target_data = np.zeros((len(target_txt), max_decoder_seq_length, num_decoder_tokens), dtype = 'float32')

for line, (input_, target_) in enumerate(zip(input_txt, target_txt)):
    for timestep, token in enumerate(re.findall(r"[\w']+|[^\s\w]", input_)):
        encoder_input_data[line, timestep, input_dict[token]] = 1
    for timestep, token in enumerate(target_.split()):
        decoder_input_data[line, timestep, target_dict[token]] = 1
        if timestep > 0:
            decoder_target_data[line, timestep - 1 , target_dict[token]] = 1

## 6. Building the Model

In [11]:
from tensorflow import keras
from keras.layers import Input, LSTM, Dense
from keras.models import Model

dimensionality = [1024, 512, 256]

batch_size = 10
epochs = 800

**Encoder Model:**

In [12]:
encoder_inputs = Input(shape = (None, num_encoder_tokens), name ='encoder_inputs')
outputs = encoder_inputs
encoder_states = []
for j in range(len(dimensionality))[::-1]:
    outputs, h, c = LSTM(dimensionality[j], return_state = True,dropout = 0.2, return_sequences = bool(j))(outputs)
    encoder_states += [h,c]

In [13]:
print(encoder_states)

[<tf.Tensor 'lstm/PartitionedCall:2' shape=(None, 256) dtype=float32>, <tf.Tensor 'lstm/PartitionedCall:3' shape=(None, 256) dtype=float32>, <tf.Tensor 'lstm_1/PartitionedCall:2' shape=(None, 512) dtype=float32>, <tf.Tensor 'lstm_1/PartitionedCall:3' shape=(None, 512) dtype=float32>, <tf.Tensor 'lstm_2/PartitionedCall:2' shape=(None, 1024) dtype=float32>, <tf.Tensor 'lstm_2/PartitionedCall:3' shape=(None, 1024) dtype=float32>]


**Decoder Model:**

In [14]:
decoder_inputs = Input(shape=(None, num_decoder_tokens), name = 'decoder_inputs')
outputs = decoder_inputs
output_layers = []

for j in range(len(dimensionality)):
    output_layers.append(
        LSTM(dimensionality[len(dimensionality) - j - 1], return_sequences=True, return_state=True, dropout = 0.2)
    )
    outputs, dh, hc = output_layers[-1](outputs, initial_state=encoder_states[2*j:2*(j+1)])

decoder_dense = Dense(num_decoder_tokens, activation='softmax',  name = 'decoder_dense')
decoder_outputs = decoder_dense(outputs)

In [None]:
#print(decoder)

## 7. Training the Model

In [14]:
#Buidling the Model
seq2seq_model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

#Compiling
seq2seq_model.compile(optimizer='adam', loss ='categorical_crossentropy', metrics = ['accuracy'], sample_weight_mode = 'temporal')

#Training the Model with the created matrices
seq2seq_model.fit([encoder_input_data, decoder_input_data], decoder_target_data, batch_size = batch_size, epochs = epochs, validation_split = 0.2)
seq2seq_model.save('/kaggle/working/bi_3stack_full_dataset.h5')

## Note: To save the h5 file created in /kaggle/working

In [None]:
import os
os.chdir(r'/kaggle/working')
from IPython.display import FileLink
FileLink(r'chat_bot.h5')

## 8. Re-Creating the Model taking trained weights from the model saved above

In [18]:
from keras.models import load_model
#s2s_model = load_model('/kaggle/input/chatbot-output/chat_bot_adam.h5')
s2s_model = load_model('../input/bi-bot/bi_chat_bot_n.h5')

s2s_model.summary()

Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, None, 981)]  0                                            
__________________________________________________________________________________________________
bidirectional (Bidirectional)   [(None, 512), (None, 2535424     input_1[0][0]                    
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, None, 1003)] 0                                            
__________________________________________________________________________________________________
concatenate (Concatenate)       (None, 512)          0           bidirectional[0][1]              
                                                                 bidirectional[0][3]   

In [None]:
#print()

In [19]:
#print(s2s_model.summary())
# Creating Encoder model from inputs and input_states
encoder_inputs = s2s_model.input[0]
encoder_outputs1, enc_hidden_state1, enc_cell_state1 = s2s_model.layers[2].output
encoder_outputs2, enc_hidden_state2, enc_cell_state2 = s2s_model.layers[4].output
encoder_outputs3, enc_hidden_state3, enc_cell_state3 = s2s_model.layers[6].output
encoder_states = [enc_hidden_state1, enc_cell_state1, enc_hidden_state2, enc_cell_state2,  enc_hidden_state3, enc_cell_state3]
encoder_model = Model(encoder_inputs, encoder_states)
print(encoder_states)

TypeError: Cannot iterate over a tensor with unknown first dimension.

In [20]:
# =====
d_outputs = decoder_inputs
decoder_input_states = []
decoder_states = []

for j in range(len(dimensionality))[::-1]:
    current_state_inputs = [Input(shape=(dimensionality[j],)) for _ in range(2)]

    temp = output_layers[len(dimensionality)-j-1](d_outputs, initial_state=current_state_inputs)

    d_outputs, cur_states = temp[0], temp[1:]

    decoder_states += cur_states
    decoder_input_states += current_state_inputs

decoder_outputs = decoder_dense(d_outputs)

# Buidling decoder model from decoder inputs/input_states and output/output_states
decoder_model = Model([decoder_inputs] + decoder_input_states, [decoder_outputs] + decoder_states)

In [21]:
print(encoder_model.summary())

NameError: name 'encoder_model' is not defined

In [22]:
print(decoder_model.summary())

Model: "functional_3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
decoder_inputs (InputLayer)     [(None, None, 3192)] 0                                            
__________________________________________________________________________________________________
input_1 (InputLayer)            [(None, 256)]        0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 256)]        0                                            
__________________________________________________________________________________________________
lstm_3 (LSTM)                   [(None, None, 256),  3531776     decoder_inputs[0][0]             
                                                                 input_1[0][0]         

## 9. Tesing the created model

In [None]:
def robot_response(user_input):
    state_values = encoder_model.predict(user_input)
    target_seq = np.zeros((1,1,num_decoder_tokens))
    target_seq[0,0, target_dict['<START>']] = 1
    bot_response = ''
    start_flag = True
    
    while start_flag:
        to_split = decoder_model.predict([target_seq] + state_values)
        output_tokens, state_values = to_split[0], to_split[1:]
        
        # Choosing one with hightest probability
        response_token_index = np.argmax(output_tokens[0, 0])
        response_token = reverse_target_dict[response_token_index]

        bot_response +=  response_token + " "
        
        if(response_token == '<STOP>'):
            start_flag = False
            
        # Update target sequence
        target_seq = np.zeros((1,1,num_decoder_tokens))
        target_seq[0,0, response_token_index] = 1
        
    return bot_response

## 10. Final Chat Bot

In [None]:
class ChatBot:
    neg_responses = ('unhappy','no','sorry')
    exit_commands = ('stop', 'pause', 'exit', 'goodbye', 'bye')
    
    # To initiate chat
    def start_chat(self):
        print("Bot: ", "Hi, I'm a Friendly Bot. Would you like to chat with me?\n")
        user_response = input()
        #print(user_response)
        
        if user_response in self.neg_responses:
            print("Bot: ","Ok, Have a great day!")
            return
        self.chat(user_response)
        
    # To exit chat
    def make_exit(self, reply):
        for exit_command in self.exit_commands:
            if exit_command in reply:
                print('Bot: ','Ok, have a great day!')
                return True
        return False
    
    # To continue chat till exit
    def chat(self, reply):
        while not self.make_exit(reply):
            print("Bot: ",self.generate_response(reply) + '\n')
            reply = input()
            print("User: ",reply)
    
            
    # To generate a response using seq2seq model we built
    def generate_response(self, user_input):
        input_matrix = self.string_to_matrix(user_input)
        chatbot_response = robot_response(input_matrix)
        #Remove <START> and <STOP> tokens from chatbot_response
        chatbot_response = chatbot_response.replace("<START>",'')
        chatbot_response = chatbot_response.replace("<STOP>",'')
        return chatbot_response
    
    #Method to convert user input into a matrix
    def string_to_matrix(self, user_input):
        tokens = re.findall(r"[\w']+|[^\s\w]", user_input)
        #print("tokens in ip: ", tokens)
        # First input indicates one row
        user_input_matrix = np.zeros((1, max_encoder_seq_length, num_encoder_tokens),dtype='float32')
        #print(user_input_matrix.shape)
        for timestep, token in enumerate(tokens):
            if token in input_dict:
                # it can't able to handle other words out of trained human words
                user_input_matrix[0, timestep, input_dict[token]] = 1
                #print(user_input_matrix[0][timestep][input_dict[token]])
        #print(user_input_matrix)
        #print(user_input_matrix.shape)
        return user_input_matrix

chatbot = ChatBot()

In [None]:
chatbot.start_chat()