In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/gdrive


In [3]:
!git clone https://github.com/totalgood/nlpia.git

Cloning into 'nlpia'...
remote: Enumerating objects: 196, done.[K
remote: Counting objects: 100% (196/196), done.[K
remote: Compressing objects: 100% (118/118), done.[K
remote: Total 5827 (delta 98), reused 138 (delta 57), pack-reused 5631[K
Receiving objects: 100% (5827/5827), 124.02 MiB | 27.05 MiB/s, done.
Resolving deltas: 100% (3674/3674), done.


In [0]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [0]:
df = pd.read_csv('/content/nlpia/src/nlpia/data/moviedialog.csv')

In [0]:
df.drop(columns='Unnamed: 0', axis=1,inplace=True)

In [8]:
df.head(5)

Unnamed: 0,statement,reply
0,you're asking me out. that's so cute. what's y...,forget it.
1,"no, no, it's my fault we didn't have a proper ...",cameron.
2,"gosh, if only we could find kat a boyfriend...",let me see what i can do.
3,c'esc ma tete. this is my head,right. see? you're ready for the quiz.
4,how is our little find the wench a date plan p...,"well, there's someone i think might be"


In [0]:
import tensorflow as tf
import os
from tensorflow.python.keras.layers import Layer
from tensorflow.python.keras import backend as K


class AttentionLayer(Layer):
    """
    This class implements Bahdanau attention (https://arxiv.org/pdf/1409.0473.pdf).
    There are three sets of weights introduced W_a, U_a, and V_a
     """

    def __init__(self, **kwargs):
        super(AttentionLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        assert isinstance(input_shape, list)
        # Create a trainable weight variable for this layer.

        self.W_a = self.add_weight(name='W_a',
                                   shape=tf.TensorShape((input_shape[0][2], input_shape[0][2])),
                                   initializer='uniform',
                                   trainable=True)
        self.U_a = self.add_weight(name='U_a',
                                   shape=tf.TensorShape((input_shape[1][2], input_shape[0][2])),
                                   initializer='uniform',
                                   trainable=True)
        self.V_a = self.add_weight(name='V_a',
                                   shape=tf.TensorShape((input_shape[0][2], 1)),
                                   initializer='uniform',
                                   trainable=True)

        super(AttentionLayer, self).build(input_shape)  # Be sure to call this at the end

    def call(self, inputs, verbose=False):
        """
        inputs: [encoder_output_sequence, decoder_output_sequence]
        """
        assert type(inputs) == list
        encoder_out_seq, decoder_out_seq = inputs
        if verbose:
            print('encoder_out_seq>', encoder_out_seq.shape)
            print('decoder_out_seq>', decoder_out_seq.shape)

        def energy_step(inputs, states):
            """ Step function for computing energy for a single decoder state """

            assert_msg = "States must be a list. However states {} is of type {}".format(states, type(states))
            assert isinstance(states, list) or isinstance(states, tuple), assert_msg

            """ Some parameters required for shaping tensors"""
            en_seq_len, en_hidden = encoder_out_seq.shape[1], encoder_out_seq.shape[2]
            de_hidden = inputs.shape[-1]

            """ Computing S.Wa where S=[s0, s1, ..., si]"""
            # <= batch_size*en_seq_len, latent_dim
            reshaped_enc_outputs = K.reshape(encoder_out_seq, (-1, en_hidden))
            # <= batch_size*en_seq_len, latent_dim
            W_a_dot_s = K.reshape(K.dot(reshaped_enc_outputs, self.W_a), (-1, en_seq_len, en_hidden))
            if verbose:
                print('wa.s>',W_a_dot_s.shape)

            """ Computing hj.Ua """
            U_a_dot_h = K.expand_dims(K.dot(inputs, self.U_a), 1)  # <= batch_size, 1, latent_dim
            if verbose:
                print('Ua.h>',U_a_dot_h.shape)

            """ tanh(S.Wa + hj.Ua) """
            # <= batch_size*en_seq_len, latent_dim
            reshaped_Ws_plus_Uh = K.tanh(K.reshape(W_a_dot_s + U_a_dot_h, (-1, en_hidden)))
            if verbose:
                print('Ws+Uh>', reshaped_Ws_plus_Uh.shape)

            """ softmax(va.tanh(S.Wa + hj.Ua)) """
            # <= batch_size, en_seq_len
            e_i = K.reshape(K.dot(reshaped_Ws_plus_Uh, self.V_a), (-1, en_seq_len))
            # <= batch_size, en_seq_len
            e_i = K.softmax(e_i)

            if verbose:
                print('ei>', e_i.shape)

            return e_i, [e_i]

        def context_step(inputs, states):
            """ Step function for computing ci using ei """
            # <= batch_size, hidden_size
            c_i = K.sum(encoder_out_seq * K.expand_dims(inputs, -1), axis=1)
            if verbose:
                print('ci>', c_i.shape)
            return c_i, [c_i]

        def create_inital_state(inputs, hidden_size):
            # We are not using initial states, but need to pass something to K.rnn funciton
            fake_state = K.zeros_like(inputs)  # <= (batch_size, enc_seq_len, latent_dim
            fake_state = K.sum(fake_state, axis=[1, 2])  # <= (batch_size)
            fake_state = K.expand_dims(fake_state)  # <= (batch_size, 1)
            fake_state = K.tile(fake_state, [1, hidden_size])  # <= (batch_size, latent_dim
            return fake_state

        fake_state_c = create_inital_state(encoder_out_seq, encoder_out_seq.shape[-1])
        fake_state_e = create_inital_state(encoder_out_seq, encoder_out_seq.shape[1])  # <= (batch_size, enc_seq_len, latent_dim

        """ Computing energy outputs """
        # e_outputs => (batch_size, de_seq_len, en_seq_len)
        last_out, e_outputs, _ = K.rnn(
            energy_step, decoder_out_seq, [fake_state_e],
        )

        """ Computing context vectors """
        last_out, c_outputs, _ = K.rnn(
            context_step, e_outputs, [fake_state_c],
        )

        return c_outputs, e_outputs

    def compute_output_shape(self, input_shape):
        """ Outputs produced by the layer """
        return [
            tf.TensorShape((input_shape[1][0], input_shape[1][1], input_shape[1][2])),
            tf.TensorShape((input_shape[1][0], input_shape[1][1], input_shape[0][1]))
        ]

# Preprocessing

In [0]:
input_texts, target_texts = [], []
input_vocabulary = set()
output_vocabulary = set()
start_token = '\t'
stop_token = '\n'
max_training_samples = min(25000, len(df) -1)

In [0]:
for input_text, target_text in zip(df.statement, df.reply):
  target_text = start_token + target_text + stop_token
  input_texts.append(input_text)
  target_texts.append(target_text)
  for char in input_text:
    input_vocabulary.add(char)
  for char in target_text:
    output_vocabulary.add(char) 

In [0]:
input_vocabulary = sorted(input_vocabulary)
output_vocabulary = sorted(output_vocabulary)

In [0]:
input_vocab_size = len(input_vocabulary)
output_vocab_size = len(output_vocabulary)

In [0]:
max_encoder_seq_length = max([len(txt) for txt in input_texts])
max_decoder_seq_length = max([len(txt) for txt in target_texts])

In [0]:
input_char_index = dict((c,i) for i,c in enumerate(input_vocabulary))
target_char_index = dict((c,i) for i,c in enumerate(output_vocabulary))

reverse_input_char_index = dict((i, c) for c, i in input_char_index.items())
reverse_target_char_index = dict((i, c) for c, i in target_char_index.items())

In [0]:
import numpy as np
encoder_input_data = np.zeros((len(input_texts), max_encoder_seq_length, input_vocab_size), dtype = 'float32')
decoder_input_data = np.zeros((len(input_texts), max_decoder_seq_length, output_vocab_size), dtype = 'float32')
decoder_target_data = np.zeros((len(input_texts), max_decoder_seq_length, output_vocab_size), dtype='float32')

In [0]:
encoder_input_data.reshape()

(100, 44)

In [0]:
for i, (input_text, target_text) in enumerate(zip(input_texts, target_texts)):
  for t,c in enumerate(input_text):
    encoder_input_data[i, t, input_char_index[c]] = 1.
  for t,c in enumerate(target_text):
    decoder_input_data[i, t, target_char_index[c]] = 1.
    if t > 0:
      decoder_target_data[i, t-1, target_char_index[c]] = 1.

In [0]:
import numpy as np  
import pandas as pd 
import re           
from bs4 import BeautifulSoup 
from keras.preprocessing.text import Tokenizer 
from keras.preprocessing.sequence import pad_sequences
from nltk.corpus import stopwords  
import nltk
nltk.download('stopwords') 
from tensorflow.keras.layers import Input, LSTM, Embedding, Dense, Concatenate, TimeDistributed, Bidirectional, GRU
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping
import warnings
pd.set_option("display.max_colwidth", 200)
warnings.filterwarnings("ignore")

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [0]:
encoder_inputs.shape

TensorShape([Dimension(None), Dimension(None), Dimension(100), Dimension(44)])

In [0]:
enc_emb.shape

TensorShape([Dimension(None), Dimension(44), Dimension(512)])

In [0]:
encoder_input_data.shape

(64350, 100, 44)

In [0]:
enc_emb.shape

TensorShape([Dimension(None), Dimension(44), Dimension(512)])

In [0]:
input_vocab_size

44

In [0]:
max_decoder_seq_length

102

In [0]:
encoder_inputs.shape

TensorShape([Dimension(64), Dimension(100), Dimension(44)])

In [0]:
decoder_inputs.shape

TensorShape([Dimension(64), Dimension(102), Dimension(46)])

In [0]:
encoder_outputs.shape

TensorShape([Dimension(64), Dimension(100), Dimension(512)])

In [0]:
decoder_outputs.shape

TensorShape([Dimension(64), Dimension(102), Dimension(512)])

In [0]:
attn_out.shape

TensorShape([Dimension(64), Dimension(None), Dimension(512)])

In [0]:
attn_out.shape

TensorShape([Dimension(64), Dimension(None), Dimension(512)])

In [0]:
output_vocab_size

46

In [0]:
# from keras.models import Model
# from keras.layers import Input, LSTM, Dense
from keras import backend as K 
K.clear_session() 
batch_size = 64
epochs = 100
num_neurons = 512

encoder_inputs = Input(batch_shape=(batch_size, max_encoder_seq_length, input_vocab_size))
decoder_inputs = Input(batch_shape=(batch_size, max_decoder_seq_length, output_vocab_size))

encoder_lstm1 = LSTM(num_neurons, return_sequences=True, return_state=True, name='encoder_lstm1')
encoder_outputs, state_h, state_c = encoder_lstm1(encoder_inputs)

decoder_lstm1 = LSTM(num_neurons, return_sequences=True, return_state=True, name='decoder_lstm1')
decoder_outputs,decoder_fwd_state,decoder_back_state = decoder_lstm1(decoder_inputs,initial_state=[state_h, state_c]) 

attn_layer = AttentionLayer(name='attention_layer') 
attn_out, attn_states = attn_layer([encoder_outputs, decoder_outputs])

# Concat attention output and decoder LSTM output 
decoder_concat_input = Concatenate(axis=-1, name='concat_layer')([decoder_outputs, attn_out])

#Dense layer
dense = Dense(output_vocab_size, activation='softmax', name='softmax_layer')
decoder_time = TimeDistributed(dense, name = 'time_distributed_layer') 
decoder_pred = decoder_time(decoder_concat_input) 

# Define the model
model = Model(inputs=[encoder_inputs, decoder_inputs], outputs=decoder_pred) 
model.summary()



Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(64, 100, 44)]      0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            [(64, 102, 46)]      0                                            
__________________________________________________________________________________________________
encoder_lstm1 (LSTM)            [(64, 100, 512), (64 1140736     input_1[0][0]                    
__________________________________________________________________________________________________
decoder_lstm1 (LSTM)            [(64, 102, 512), (64 1144832     input_2[0][0]                    
                                                                 encoder_lstm1[0][1]          

In [0]:
encoder_inputs.shape

TensorShape([Dimension(None), Dimension(100), Dimension(44)])

In [0]:
enc_emb.shape

TensorShape([Dimension(None), Dimension(100), Dimension(512)])

In [0]:
encoder_outputs.shape

TensorShape([Dimension(None), Dimension(100), Dimension(512)])

In [0]:
decoder_inputs.shape

TensorShape([Dimension(None), Dimension(None), Dimension(None)])

In [0]:
dec_emb.shape

TensorShape([Dimension(None), Dimension(None), Dimension(512)])

In [0]:
decoder_inputs.shape

TensorShape([Dimension(None), Dimension(None), Dimension(46)])

In [0]:
decoder_outputs.shape

TensorShape([Dimension(None), Dimension(None), Dimension(512)])

In [0]:
attn_out.shape

TensorShape([Dimension(None), Dimension(None), Dimension(512)])

In [0]:
# from keras.models import Model
# from keras.layers import Input, LSTM, Dense
from keras import backend as K 
K.clear_session() 
batch_size = 64
epochs = 100
num_neurons = 512


# Encoder 
encoder_inputs = Input(shape=(max_encoder_seq_length,input_vocab_size,)) 
# enc_emb = Embedding(input_vocab_size, num_neurons,trainable=True)(encoder_inputs) 

#LSTM 1 
encoder_lstm1 = LSTM(num_neurons,return_sequences=True,return_state=True) 
encoder_outputs, state_h, state_c = encoder_lstm1(encoder_inputs) 

# #LSTM 2 
# encoder_lstm2 = LSTM(num_neurons,return_sequences=True,return_state=True) 
# encoder_output2, state_h2, state_c2 = encoder_lstm2(encoder_output1) 

# #LSTM 3 
# encoder_lstm3=LSTM(num_neurons, return_state=True, return_sequences=True) 
# encoder_outputs, state_h, state_c= encoder_lstm3(encoder_output2) 

# Set up the decoder. 
decoder_inputs = Input(shape=(max_decoder_seq_length, output_vocab_size)) 
# dec_emb_layer = Embedding(output_vocab_size, num_neurons, trainable=True) 
# dec_emb = dec_emb_layer(decoder_inputs) 

#LSTM using encoder_states as initial state
decoder_lstm = LSTM(num_neurons, return_sequences=True, return_state=True) 
decoder_outputs,decoder_fwd_state, decoder_back_state = decoder_lstm(decoder_inputs,initial_state=[state_h, state_c]) 

#Attention Layer
attn_layer = AttentionLayer(name='attention_layer') 
attn_out, attn_states = attn_layer([encoder_outputs, decoder_outputs]) 

# Concat attention output and decoder LSTM output 
decoder_concat_input = Concatenate(axis=-1, name='concat_layer')([decoder_outputs, attn_out])

#Dense layer
decoder_dense = TimeDistributed(Dense(output_vocab_size, activation='softmax')) 
decoder_pred = decoder_dense(decoder_concat_input) 

# Define the model
model = Model([encoder_inputs, decoder_inputs], decoder_pred) 
model.summary()




ValueError: ignored

In [0]:
model = Model([encoder_inputs, decoder_inputs], decoder_pred)
model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['acc'])
model.fit([encoder_input_data, decoder_input_data], decoder_target_data, batch_size = batch_size, epochs = epochs, validation_split=0.1)

Train on 57915 samples, validate on 6435 samples
Epoch 1/100


InvalidArgumentError: ignored

In [0]:
from keras import backend as K 
K.clear_session() 
batch_size = 64
epochs = 100
num_neurons = 512

encoder_inputs = Input(batch_shape=(batch_size, max_encoder_seq_length, input_vocab_size))
decoder_inputs = Input(batch_shape=(batch_size, max_decoder_seq_length, output_vocab_size))

encoder_gru = GRU(num_neurons, return_sequences=True, return_state=True, name='encoder_gru')
encoder_outputs, encoder_state = encoder_gru(encoder_inputs)

decoder_gru = GRU(num_neurons, return_sequences=True, return_state=True, name='decoder_gru')
decoder_outputs,decoder_state = decoder_gru(decoder_inputs,initial_state= encoder_state) 

attn_layer = AttentionLayer(name='attention_layer') 
attn_out, attn_states = attn_layer([encoder_outputs, decoder_outputs])

# Concat attention output and decoder LSTM output 
decoder_concat_input = Concatenate(axis=1, name='concat_layer')([decoder_outputs, attn_out])

#Dense layer
dense = Dense(output_vocab_size, activation='softmax', name='softmax_layer')
decoder_time = TimeDistributed(dense, name = 'time_distributed_layer') 
decoder_pred = decoder_time(decoder_concat_input) 

# Define the model
model = Model(inputs=[encoder_inputs, decoder_inputs], outputs=decoder_pred) 
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(64, 100, 44)]      0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            [(64, 102, 46)]      0                                            
__________________________________________________________________________________________________
encoder_gru (GRU)               [(64, 100, 512), (64 855552      input_1[0][0]                    
__________________________________________________________________________________________________
decoder_gru (GRU)               [(64, 102, 512), (64 858624      input_2[0][0]                    
                                                                 encoder_gru[0][1]            

In [0]:
encoder_input_data.shape

(64350, 100, 44)

In [0]:
# Assemble the model for sequence generation
encoder_model = Model(encoder_inputs, encoder_states)
thought_input = [Input(shape=(num_neurons,)), Input(shape=(num_neurons,))]
decoder_outputs, state_h, state_c = decoder_lstm(decoder_inputs, initial_state=thought_input)
decoder_states = [state_h, state_c]
decoder_outputs = decoder_dense(decoder_outputs)

decoder_model = Model(inputs=[decoder_inputs] + thought_input,
                     outputs=[decoder_outputs] + decoder_states)

In [0]:
# Build a character-based translator 
def decode_sequence(input_seq):
  thought = encoder_model.predict(input_seq)
  
  target_seq = np.zeros((1,1,output_vocab_size))
  target_seq[0, 0, target_token_index[start_token]] = 1.
  stop_condition = False
  generated_sequence = ''
  
  while not stop_condition:
    output_tokens, h, c = decoder_model.predict([target_seq] + thought)
    generated_token_idx = np.argmax(output_tokens[0, -1, :])
    generated_char = reverse_target_char_index[generated_token_idx]
    generated_sequence += generated_char
    if (generated_char == stop_token or len(generated_sequence) > max_decoder_seq_length):
      stop_condition = True
    target_seq = np.zeros((1, 1, output_vocab_size))
    target_seq[0, 0, generated_token_idx] = 1.
    thought = [h, c]
  return generated_sequence

In [0]:
# Generating a response
def response(input_text):
  input_seq = np.zeros((1, max_encoder_seq_length, input_vocab_size), dtype='float32')
  for t, char in enumerate(input_text):
    input_seq[0, t, input_token_index[char]] = 1.
  decoded_sentence = decode_sequence(input_seq)
  print('Bot Reply (Decoded sentence):', decoded_sentence)