<a href="https://colab.research.google.com/github/abhishek203/conversational-chatbot/blob/master/Chatbot_3_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import numpy as np
import pandas as pd 
import tensorflow as tf
import re
import time
import io
import os
from keras.preprocessing.sequence import pad_sequences
from keras.utils import plot_model
from keras.models import Model
from keras.layers import Input, LSTM, Dense, Embedding, Bidirectional, concatenate, Reshape
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
import time
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from sklearn.model_selection import train_test_split



In [0]:
path_to_file = "/content/Chatbot_data.txt"

In [0]:
data = io.open(path_to_file,errors = 'ignore').read().strip().split('- - ')

In [0]:
raw_question = []
raw_ans = []
for i in range(len(data)):
    pair = data[i].split('  - ')
    for j in range(len(pair)-1):
        raw_question.append(pair[0])
        raw_ans.append(pair[j+1])


In [0]:
def clean_text(text):
    '''Clean text by removing unnecessary characters and altering the format of words.'''

    text = text.lower()
    
    text = re.sub(r"i'm", "i am", text)
    text = re.sub(r"he's", "he is", text)
    text = re.sub(r"she's", "she is", text)
    text = re.sub(r"it's", "it is", text)
    text = re.sub(r"that's", "that is", text)
    text = re.sub(r"what's", "that is", text)
    text = re.sub(r"where's", "where is", text)
    text = re.sub(r"how's", "how is", text)
    text = re.sub(r"\'ll", " will", text)
    text = re.sub(r"\'ve", " have", text)
    text = re.sub(r"\'re", " are", text)
    text = re.sub(r"\'d", " would", text)
    text = re.sub(r"\'re", " are", text)
    text = re.sub(r"won't", "will not", text)
    text = re.sub(r"can't", "cannot", text)
    text = re.sub(r"n't", " not", text)
    text = re.sub(r"n'", "ng", text)
    text = re.sub(r"'bout", "about", text)
    text = re.sub(r"'til", "until", text)
    text = re.sub(r"[-()\"#/@;:<>{}`+=~|.!?,]", "", text)
    text = " ".join(text.split())
    return text

In [0]:
clean_questions = []
for question in raw_question:
    clean_questions.append(clean_text(question))
    
clean_answers = []    
for answer in raw_ans:
    clean_answers.append(clean_text(answer))

In [0]:
question = []
ans = []
for i in range(len(clean_questions)):
  question.append('<start> '+ clean_questions[i] +' <end>')
for i in range(len(clean_answers)):
  ans.append('<start> '+ clean_answers[i] +' <end>')



In [0]:
tokenizer = tf.keras.preprocessing.text.Tokenizer(filters='',oov_token='<oov>')

tokenizer.fit_on_texts(question)
question_seq = tokenizer.texts_to_sequences(question)
question_seq = tf.keras.preprocessing.sequence.pad_sequences(question_seq,padding='post')
tokenizer.fit_on_texts(ans)
ans_seq = tokenizer.texts_to_sequences(ans)
ans_seq = tf.keras.preprocessing.sequence.pad_sequences(ans_seq, padding = 'post')

In [0]:
len_input_seq = len(question_seq[0])
len_output_seq = len(ans_seq[0])
print(len(ans))

BUFFER_SIZE = len(question_seq)
BATCH_SIZE = 64
steps_per_epoch = len(question_seq)//BATCH_SIZE
embedding_dim = 256
units = 1024
vocab_inp_size = len(tokenizer.word_index)+1
vocab_tar_size = len(tokenizer.word_index)+1

dataset = tf.data.Dataset.from_tensor_slices((question_seq, ans_seq)).shuffle(BUFFER_SIZE)
dataset = dataset.batch(BATCH_SIZE, drop_remainder=True)


In [0]:
class Encoder(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, enc_units, batch_sz):
    super(Encoder, self).__init__()
    self.batch_sz = batch_sz
    self.enc_units = enc_units
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
    self.lstm = tf.keras.layers.LSTM(self.enc_units,
                                   return_sequences=True,
                                   return_state=True,
                                   recurrent_initializer='glorot_uniform')

  def call(self, x):
    x = self.embedding(x)
    output, state_h,state_c = self.lstm(x)
    return output, state_h


In [0]:
class BahdanauAttention(tf.keras.layers.Layer):
  def __init__(self, units):
    super(BahdanauAttention, self).__init__()
    self.W1 = tf.keras.layers.Dense(units)
    self.W2 = tf.keras.layers.Dense(units)
    self.V = tf.keras.layers.Dense(1)

  def call(self, query, values):
    query_with_time_axis = tf.expand_dims(query, 1)

    score = self.V(tf.nn.tanh(
        self.W1(query_with_time_axis) + self.W2(values)))

    attention_weights = tf.nn.softmax(score, axis=1)

    context_vector = attention_weights * values
    context_vector = tf.reduce_sum(context_vector, axis=1)

    return context_vector, attention_weights

In [0]:
class Decoder(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, dec_units, batch_sz):
    super(Decoder, self).__init__()
    self.batch_sz = batch_sz
    self.dec_units = dec_units
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
    self.lstm = tf.keras.layers.LSTM(self.dec_units,
                                   return_sequences=True,
                                   return_state=True,
                                   recurrent_initializer='glorot_uniform')
    self.fc = tf.keras.layers.Dense(vocab_size)

    self.attention = BahdanauAttention(self.dec_units)

  def call(self, x, hidden, enc_output):
    context_vector, attention_weights = self.attention(hidden, enc_output)

    x = self.embedding(x)

    x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)

    output,state,_ = self.lstm(x)

    output = tf.reshape(output, (-1, output.shape[2]))

    x = self.fc(output)

    return x, state, attention_weights

In [0]:
encoder = Encoder(vocab_inp_size, embedding_dim, units, BATCH_SIZE)
decoder = Decoder(vocab_tar_size, embedding_dim, units, BATCH_SIZE)

In [0]:
optimizer = tf.keras.optimizers.Adam()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
    from_logits=True, reduction='none')

def loss_function(real, pred):
  mask = tf.math.logical_not(tf.math.equal(real, 0))
  loss_ = loss_object(real, pred)

  mask = tf.cast(mask, dtype=loss_.dtype)
  loss_ *= mask

  return tf.reduce_mean(loss_)

In [0]:
checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(optimizer=optimizer,
                                 encoder=encoder,
                                 decoder=decoder)

In [0]:
@tf.function
def train_step(inp, targ):
  loss = 0

  with tf.GradientTape() as tape:
    enc_output, enc_hidden = encoder(inp)

    dec_hidden = enc_hidden

    dec_input = tf.expand_dims([tokenizer.word_index['<start>']] * BATCH_SIZE, 1)

    for t in range(1, targ.shape[1]):
      predictions, dec_hidden, _ = decoder(dec_input, dec_hidden, enc_output)

      loss += loss_function(targ[:, t], predictions)

      dec_input = tf.expand_dims(targ[:, t], 1)

  batch_loss = (loss / int(targ.shape[1]))

  variables = encoder.trainable_variables + decoder.trainable_variables

  gradients = tape.gradient(loss, variables)

  optimizer.apply_gradients(zip(gradients, variables))

  return batch_loss

In [0]:
EPOCHS = 10

for epoch in range(EPOCHS):
  start = time.time()

  total_loss = 0

  for (batch, (inp, targ)) in enumerate(dataset.take(steps_per_epoch)):
    batch_loss = train_step(inp, targ)
    total_loss += batch_loss

    if batch % 100 == 0:
      print('Epoch {} Batch {} Loss {:.4f}'.format(epoch + 1,
                                                   batch,
                                                   batch_loss.numpy()))
  # saving (checkpoint) the model every 2 epochs
  if (epoch + 1) % 2 == 0:
    checkpoint.save(file_prefix = checkpoint_prefix)

  print('Epoch {} Loss {:.4f}'.format(epoch + 1,
                                      total_loss / steps_per_epoch))
  print('Time taken for 1 epoch {} sec\n'.format(time.time() - start))

In [0]:
tf.saved_model.save(encoder,'my_encoder')

In [0]:
def evaluate(sentence):

  sentence = clean_text(sentence)
  sentence = '<start> ' + sentence + ' <end>'
  tokenizer.fit_on_texts(sentence)

  inputs = [tokenizer.word_index[i] for i in sentence.split(' ')]
  inputs = tf.keras.preprocessing.sequence.pad_sequences([inputs],
                                                         maxlen=len_input_seq,
                                                         padding='post')
  inputs = tf.convert_to_tensor(inputs)

  result = ''

  enc_out, enc_hidden = encoder(inputs)

  dec_hidden = enc_hidden
  dec_input = tf.expand_dims([tokenizer.word_index['<start>']], 0)

  for t in range(len_output_seq):
    predictions, dec_hidden, attention_weights = decoder(dec_input,
                                                         dec_hidden,
                                                         enc_out)

    predicted_id = tf.argmax(predictions[0]).numpy()

    result += tokenizer.index_word[predicted_id] + ' '

    if tokenizer.index_word[predicted_id] == '<end>':
      return result, sentence

    dec_input = tf.expand_dims([predicted_id], 0)

  return result, sentence

In [0]:
def answer(sentence):
  result, sentence = evaluate(sentence)

  print('Input: %s' % (sentence))
  print('Answer: {}'.format(result))


In [0]:
# restoring the latest checkpoint in checkpoint_dir
checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))

In [0]:
answer('hello')

In [0]:
answer('what is ai')

In [0]:
answer('who made you')

In [0]:
answer('how old are you')

In [0]:
answer('who is your father')

In [0]:
answer('do you know english')

In [0]:
answer('what did you have for dinner')

In [0]:
answer('how is the weather today')

In [0]:
answer('in what language where you written')

In [0]:
answer('bye boy')

In [0]:
answer('google')