In [2]:
import tensorflow as tf
import os
import numpy as np 
import re
from tensorflow.python.keras.layers import Layer
from tensorflow.python.keras import backend as K

In [3]:
"""
This class implements from Bahdanau attention in given website: (https://arxiv.org/pdf/1409.0473.pdf).
There are three sets of weights introduced W_a, U_a, and V_a """

class AttentionLayer(Layer):

    def __init__(self, **kwargs):
        super(AttentionLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        assert isinstance(input_shape, list)
        # Create a trainable weight variable for this layer.

        self.W_a = self.add_weight(name='W_a',
                                   shape=tf.TensorShape((input_shape[0][2], input_shape[0][2])),
                                   initializer='uniform',trainable=True)
        self.U_a = self.add_weight(name='U_a',
                                   shape=tf.TensorShape((input_shape[1][2], input_shape[0][2])),
                                   initializer='uniform',trainable=True)
        self.V_a = self.add_weight(name='V_a',
                                   shape=tf.TensorShape((input_shape[0][2], 1)),
                                   initializer='uniform',trainable=True)

        super(AttentionLayer, self).build(input_shape)  

    def call(self, inputs, verbose=False):
        """
        inputs: [encoder_output_sequence, decoder_output_sequence]
        """
        assert type(inputs) == list
        encoder_out_seq, decoder_out_seq = inputs
        if verbose:
            print('encoder_out_seq>', encoder_out_seq.shape)
            print('decoder_out_seq>', decoder_out_seq.shape)

        def energy_step(inputs, states):
            """ Step function for computing energy for a single decoder state
            inputs: (batchsize * 1 * de_in_dim)
            states: (batchsize * 1 * de_latent_dim)
            """

            assert_msg = "States must be an iterable. Got {} of type {}".format(states, type(states))
            assert isinstance(states, list) or isinstance(states, tuple), assert_msg

            """ Some parameters required for shaping tensors"""
            en_seq_len, en_hidden = encoder_out_seq.shape[1], encoder_out_seq.shape[2]
            de_hidden = inputs.shape[-1]

            """ Computing S.Wa where S=[s0, s1, ..., si]"""
            # <= batch size * en_seq_len * latent_dim
            W_a_dot_s = K.dot(encoder_out_seq, self.W_a)

            """ Computing hj.Ua """
            U_a_dot_h = K.expand_dims(K.dot(inputs, self.U_a), 1)  # <= batch_size, 1, latent_dim
            if verbose:
                print('Ua.h>', U_a_dot_h.shape)

            """ tanh(S.Wa + hj.Ua) """
            # <= batch_size*en_seq_len, latent_dim
            Ws_plus_Uh = K.tanh(W_a_dot_s + U_a_dot_h)
            if verbose:
                print('Ws+Uh>', Ws_plus_Uh.shape)

            """ softmax(va.tanh(S.Wa + hj.Ua)) """
            # <= batch_size, en_seq_len
            e_i = K.squeeze(K.dot(Ws_plus_Uh, self.V_a), axis=-1)
            # <= batch_size, en_seq_len
            e_i = K.softmax(e_i)

            if verbose:
                print('ei>', e_i.shape)

            return e_i, [e_i]

        def context_step(inputs, states):
            """ Step function for computing ci using ei """

            assert_msg = "States must be an iterable. Got {} of type {}".format(states, type(states))
            assert isinstance(states, list) or isinstance(states, tuple), assert_msg

            # <= batch_size, hidden_size
            c_i = K.sum(encoder_out_seq * K.expand_dims(inputs, -1), axis=1)
            if verbose:
                print('ci>', c_i.shape)
            return c_i, [c_i]

        fake_state_c = K.sum(encoder_out_seq, axis=1)
        fake_state_e = K.sum(encoder_out_seq, axis=2)  # <= (batch_size, enc_seq_len, latent_dim

        """ Computing energy outputs """
        # e_outputs => (batch_size, de_seq_len, en_seq_len)
        last_out, e_outputs, _ = K.rnn(
            energy_step, decoder_out_seq, [fake_state_e],
        )

        """ Computing context vectors """
        last_out, c_outputs, _ = K.rnn(
            context_step, e_outputs, [fake_state_c],
        )

        return c_outputs, e_outputs

    def compute_output_shape(self, input_shape):
        """ Outputs produced by the layer """
        return [
            tf.TensorShape((input_shape[1][0], input_shape[1][1], input_shape[1][2])),
            tf.TensorShape((input_shape[1][0], input_shape[1][1], input_shape[0][1]))
        ]

In [4]:
# Importing the dataset
lines = open('data/movie_lines.txt', encoding = 'utf-8', errors = 'ignore').read().split('\n')
conversations = open('data/movie_conversations.txt', encoding = 'utf-8', errors = 'ignore').read().split('\n')

In [5]:
len(lines)

304714

In [6]:
len(conversations)

83098

In [7]:
# Creating a list of all of the conversations
exchn = []
for conver in conversations:
    exchn.append(conver.split(' +++$+++ ')[-1][1:-1].replace("'", " ").replace(",","").split())

In [8]:
# Creating a dictionary that maps each line and its id
diag = {}
for line in lines:
    diag[line.split(' +++$+++ ')[0]] = line.split(' +++$+++ ')[-1]

In [9]:
# Getting separately the questions and the answers
questions = []
answers = []

for conver in exchn:
    for i in range(len(conver) - 1):
        questions.append(diag[conver[i]])
        answers.append(diag[conver[i+1]])

In [10]:
# delete
del(lines, conversations, conver, line, diag, exchn, i)

In [11]:
sorted_ques = []
sorted_ans = []
for i in range(len(questions)):
    if len(questions[i]) < 13:
        sorted_ques.append(questions[i])
        sorted_ans.append(answers[i])


In [12]:
# Doing a first cleaning of the texts
def clean_text(text):
    text = text.lower()
    text = re.sub(r"i'm", "i am", text)
    text = re.sub(r"he's", "he is", text)
    text = re.sub(r"she's", "she is", text)
    text = re.sub(r"that's", "that is", text)
    text = re.sub(r"what's", "what is", text)
    text = re.sub(r"where's", "where is", text)
    text = re.sub(r"how's", "how is", text)
    text = re.sub(r"\'ll", " will", text)
    text = re.sub(r"\'ve", " have", text)
    text = re.sub(r"\'re", " are", text)
    text = re.sub(r"\'d", " would", text)
    text = re.sub(r"n't", " not", text)
    text = re.sub(r"won't", "will not", text)
    text = re.sub(r"can't", "cannot", text)
    text = re.sub(r"[^\w\s]", "", text)
    return text

In [13]:
#cleaning the questions and answers
clean_question = []
for line in sorted_ques:
    clean_question.append(clean_text(line))

clean_answer = []        
for line in sorted_ans:
    clean_answer.append(clean_text(line))

In [14]:
for i in range(len(clean_answer)):
    clean_answer[i] = ' '.join(clean_answer[i].split()[:11])

In [15]:
## delete
del(answers, questions, line, sorted_ans, sorted_ques, i)

In [16]:
## trimming 
clean_answer=clean_answer[:30000]
clean_question=clean_question[:30000]

In [17]:
# Creating a dictionary that maps each word to its number of occurrences
word2count = {}

for line in clean_question:
    for word in line.split():
        if word not in word2count:
            word2count[word] = 1
        else:
            word2count[word] += 1
for line in clean_answer:
    for word in line.split():
        if word not in word2count:
            word2count[word] = 1
        else:
            word2count[word] += 1

In [18]:
#delete
del(word,line)

In [19]:
# Creating a dictionary that map the words and assign it to a unique integer that means remove less frequent
thresh = 5

vocab = {}
word_num = 0
for word, count in word2count.items():
    if count >= thresh:
        vocab[word] = word_num
        word_num += 1

In [20]:
## delete
del(word2count, word, count, thresh)       
del(word_num)  

In [21]:
# Adding the last tokens to this dictionary

for i in range(len(clean_answer)):
    clean_answer[i] = '<SOS> ' + clean_answer[i] + ' <EOS>'

tokens = ['<PAD>', '<EOS>', '<OUT>', '<SOS>']
x = len(vocab)
for token in tokens:
    vocab[token] = x
    x += 1

vocab['cameron'] = vocab['<PAD>']
vocab['<PAD>'] = 0    

In [22]:
## delete
del(token, tokens, i) 
del(x)

In [23]:
# Creating the inverse dictionary of the vocab dictionary
inv_vocab = {w:v for v, w in vocab.items()}

In [24]:
# Translating all the questions and the answers into integers 
# Replacing all the words that were filtered out by <OUT> 

encoder_inp = []
for line in clean_question:
    lst = []
    for word in line.split():
        if word not in vocab:
            lst.append(vocab['<OUT>'])
        else:
            lst.append(vocab[word])
        
    encoder_inp.append(lst)

decoder_inp = []
for line in clean_answer:
    lst = []
    for word in line.split():
        if word not in vocab:
            lst.append(vocab['<OUT>'])
        else:
            lst.append(vocab[word])        
    decoder_inp.append(lst)


In [25]:
### delete
del(clean_answer, clean_question, line, lst, word)

In [26]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
encoder_inp = pad_sequences(encoder_inp, 13, padding='post', truncating='post')
decoder_inp = pad_sequences(decoder_inp, 13, padding='post', truncating='post')

In [27]:

decoder_output = []
for i in decoder_inp:
    decoder_output.append(i[1:]) 

decoder_output = pad_sequences(decoder_output, 13, padding='post', truncating='post')


#delete
del(i)

In [28]:
VOCAB_SIZE = len(vocab)
MAX_LEN = 13

print(decoder_output.shape, decoder_inp.shape, encoder_inp.shape, len(vocab), len(inv_vocab), inv_vocab[0])

(30000, 13) (30000, 13) (30000, 13) 3005 3005 <PAD>


In [29]:
inv_vocab[16]

'they'

In [30]:
# to convert a class vector (integers) to binary class matrix.

from tensorflow.keras.utils import to_categorical
decoder_output = to_categorical(decoder_output, len(vocab))


decoder_output.shape

(30000, 13, 3005)

In [31]:
# Glove Embedding
# algorithm for obtaining vector representations for words and is performed on aggregated global word-word co-occurrence statistics.

#   !ls "/content/gdrive/My Drive/Data/glove.6B.50d.txt"

embeddings_index = {}
with open('data/glove.6B.50d.txt', encoding='utf-8') as f:
    for line in f:
        values = line.split()
        word = values[0]
        coefs = np.asarray(values[1:], dtype='float32')
        embeddings_index[word] = coefs
    f.close()

print("Glove Loaded!")

Glove Loaded!


In [32]:
embedding_dimention = 50
def embedding_matrix_creater(embedding_dimention, word_index):
    embedding_matrix = np.zeros((len(word_index)+1, embedding_dimention))
    for word, i in word_index.items():
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
          # words not found in embedding index will be all-zeros.
            embedding_matrix[i] = embedding_vector
    return embedding_matrix
embedding_matrix = embedding_matrix_creater(50, word_index=vocab)

In [33]:
#delete
del(embeddings_index)

In [34]:
embedding_matrix.shape
embedding_matrix[0]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [35]:
import keras
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Embedding, LSTM, Input, Bidirectional, Concatenate, Dropout, Attention

In [36]:
embed = Embedding(VOCAB_SIZE+1, 
                  50,
                  input_length=13,
                  trainable=True)

embed.build((None,))
embed.set_weights([embedding_matrix])

In [37]:
# Building the model

enc_inp = Input(shape=(13, ))
enc_embed = embed(enc_inp)
enc_lstm = Bidirectional(LSTM(400, return_state=True, dropout=0.05, return_sequences = True))

encoder_outputs, forward_h, forward_c, backward_h, backward_c = enc_lstm(enc_embed)
state_h = Concatenate()([forward_h, backward_h])
state_c = Concatenate()([forward_c, backward_c])
enc_states = [state_h, state_c]


dec_inp = Input(shape=(13, ))
dec_embed = embed(dec_inp)
dec_lstm = LSTM(400*2, return_state=True, return_sequences=True, dropout=0.05)
output, _, _ = dec_lstm(dec_embed, initial_state=enc_states)

# attention
attention = AttentionLayer()
attention_op, attention_state = attention([encoder_outputs, output])
decoder_concat_input = Concatenate(axis=-1)([output, attention_op])


dec_dense = Dense(VOCAB_SIZE, activation='softmax')
output1 = dec_dense(decoder_concat_input)

model = Model([enc_inp, dec_inp], output1)

In [38]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 13)]         0                                            
__________________________________________________________________________________________________
input_1 (InputLayer)            [(None, 13)]         0                                            
__________________________________________________________________________________________________
embedding (Embedding)           (None, 13, 50)       150300      input_1[0][0]                    
                                                                 input_2[0][0]                    
__________________________________________________________________________________________________
bidirectional (Bidirectional)   [(None, 13, 800), (N 1443200     embedding[0][0]              

In [39]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [40]:
model.fit([encoder_inp, decoder_inp], decoder_output, epochs=40, batch_size=32, validation_split=0.15)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<tensorflow.python.keras.callbacks.History at 0x28bd26bdf08>

In [41]:
# Trained model that contains information about the model and has weights of the neurons
model.save('chatbot.h5')
model.save_weights('chatbot_weights.h5')

In [42]:
# Attention Interface

encoder_model = tf.keras.models.Model(enc_inp, [encoder_outputs, enc_states])

decoder_state_input_h = tf.keras.layers.Input(shape=( 400 * 2,))
decoder_state_input_c = tf.keras.layers.Input(shape=( 400 * 2,))

decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]


decoder_outputs, state_h, state_c = dec_lstm(dec_embed , initial_state=decoder_states_inputs)


decoder_states = [state_h, state_c]

#decoder_output = dec_dense(decoder_outputs)

decoder_model = tf.keras.models.Model([dec_inp, decoder_states_inputs],
                                      [decoder_outputs] + decoder_states)

In [None]:
# Setting up the chat

print("==========================================")
print("#       Launch Chatbot          #")
print("==========================================")


prepro1 = ""
while prepro1 != 'q':
    
    prepro1 = input("you : ")
    prepro = [prepro1]
    
    try:
        txt = []
        for x in prepro:
            lst = []
            for y in x.split():
                lst.append(vocab[y])
            txt.append(lst)
        txt = pad_sequences(txt, 13, padding='post')


        ###
        enc_op, stat = encoder_model.predict( txt )

        empty_target_seq = np.zeros( ( 1 , 1) )
        empty_target_seq[0, 0] = vocab['<SOS>']
        stop_condition = False
        decoded_translation = ''


        while not stop_condition :

            dec_outputs , h , c = decoder_model.predict([ empty_target_seq ] + stat )

            ###
            ###########################
            attention_op, attention_state = attention([enc_op, dec_outputs])
            decoder_concat_input = Concatenate(axis=-1)([dec_outputs, attention_op])
            decoder_concat_input = dec_dense(decoder_concat_input)
            ###########################

            sampled_word_index = np.argmax( decoder_concat_input[0, -1, :] )

            sampled_word = inv_vocab[sampled_word_index] + ' '

            if sampled_word != '<EOS> ':
                decoded_translation += sampled_word           


            if sampled_word == '<EOS> ' or len(decoded_translation.split()) > 13:
                stop_condition = True

            empty_target_seq = np.zeros( ( 1 , 1 ) )  
            empty_target_seq[ 0 , 0 ] = sampled_word_index
            stat = [ h , c ] 
    except:
        pass

    print("chatbot : ", decoded_translation )
    print("::::::::::::::::::::::::::::::::::::::::::::")

#       Launch Chatbot          #
you : hi
chatbot :  hi 
::::::::::::::::::::::::::::::::::::::::::::
you : how are you
chatbot :  fine i am fine how are you 
::::::::::::::::::::::::::::::::::::::::::::
you : i am fine
chatbot :  you sure 
::::::::::::::::::::::::::::::::::::::::::::
you : what are you doing
chatbot :  i am <OUT> <OUT> 
::::::::::::::::::::::::::::::::::::::::::::
you : what is weather today
chatbot :  i am going to take a big bath and order a 
::::::::::::::::::::::::::::::::::::::::::::
you : okay
chatbot :  okay okay okay all right so long <OUT> 
::::::::::::::::::::::::::::::::::::::::::::
you : what is up
chatbot :  i think you should give up <OUT> its dangerous 
::::::::::::::::::::::::::::::::::::::::::::
you : really
chatbot :  yeah 
::::::::::::::::::::::::::::::::::::::::::::
you : I am happy to talk with you
chatbot :  yeah 
::::::::::::::::::::::::::::::::::::::::::::
you : hmm
chatbot :  i have been thinking 
::::::::::::::::::::::::::::::::::::::::::::
