In [1]:
%tensorflow_version 2.x

TensorFlow 2.x selected.


In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [3]:
import sys
import os
sys.path.append('/content/gdrive/My Drive/W266-NLP/Project')
os.chdir('/content/gdrive/My Drive/W266-NLP/Project')
os.listdir()

['Squad2.0',
 'dev-v2.0.json',
 'train-v2.0.json',
 'w266_common',
 '__pycache__',
 'data',
 'model.h5',
 'encoder_model.h5',
 'decoder_model.h5',
 'hhhhhhh.txt',
 'history_tpu_history',
 'tpu_encoder_model.h5',
 'tpu_decoder_model.h5',
 'tpu_model.h5',
 'glove_helper.py',
 'tpu_answer_model_2.h5',
 'tpu_encoder_model_2.h5',
 'tpu_decoder_model_2.h5',
 'tpu_feasibility_model_2.h5',
 'tpu_history_answer_model2',
 'tpu_history_feasibility_model2']

In [1]:
import os
import numpy as np
import pickle
import pandas as pd
import json
from pandas.io.json import json_normalize
from collections import Counter
import tensorflow as tf
from tensorflow import keras as keras
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.layers import Attention
import nltk
from functools import reduce
!pip install wget
# Load PyDrive and Google Auth related packages
#!pip install -U -q PyDrive
#from pydrive.auth import GoogleAuth
#from pydrive.drive import GoogleDrive
#from google.colab import auth
#from oauth2client.client import GoogleCredentials
# Authenticate and create the PyDrive client.
#auth.authenticate_user()
#gauth = GoogleAuth()
#gauth.credentials = GoogleCredentials.get_application_default()
#drive = GoogleDrive(gauth)

import tensorflow as tf
from tensorflow import keras as keras
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.layers import Attention
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model
from tensorflow.keras import layers
from tensorflow.keras import Input
from functools import reduce
flatten = lambda data: reduce(lambda x, y: x + y, data)
import glove_helper

# Load the json data
def load_json_file(name):
  """
  Load the json file and return a json object
  """
  with open(name,encoding='utf-8') as myfile:
    data = json.load(myfile)
    return data

# Convert json data object to a pandas data frame
def convert_to_pd(data):
  """
  Load the data to a pandas dataframe.
  Dataframe Columns:
    title
    para_index
    context
    q_index
    q_id
    q_isimpossible
    q_question
    q_anscount - number of answers
    q_answers - a list of object e.g [{ text: '', answer_start: 123}, ...]
  """
  result = []
  for pdata in data['data']:
    for para in pdata['paragraphs']:
      for q in para['qas']:
        result.append({
            'title' : pdata['title'],
            'context' : para['context'],
            'q_id' : q['id'],
            'q_isimpossible' : q['is_impossible'],
            'q_question' : q['question'],
            'q_anscount' : len(q['answers']),
            'q_answers' : [a for a in q['answers']],
            'q_answers_text': [a.get("text") for a in q['answers']],
            'context_lowercase': para['context'].lower(),
            'q_question_lowercase' : q['question'].lower(),
            'q_answers_text_lowercase': [a.get("text").lower() for a in q['answers']],
            
        })

  return pd.DataFrame.from_dict(result, orient='columns')

# Load the file from shareable google drive link and return a pandas dataframe
def loadDataFile(filename): 
  """
  Download a file from google drive with the shared link
  """ 
  data = load_json_file(filename)
  return convert_to_pd(data)



In [2]:
train_filename = 'train-v2.0.json'
dev_filename = 'dev-v2.0.json'

train_pd = loadDataFile(train_filename)
dev_pd = loadDataFile(dev_filename)

In [3]:
def get_c_q_a(dataset):
    q_id_list = []
    context_list =[]
    questions_list = []
    answers_list =[]
    q_impossible_list =[]
    for index,row in dataset.iterrows():
        q_id_list.append(row.q_id)
        context_list.append(row.context)
        questions_list.append(row.q_question)
        q_impossible_list.append(int(row.q_isimpossible))
        if len(row.q_answers_text)>0 :
            answers_list.append(row.q_answers_text[0])
        else:
            answers_list.append("")
    return [q_id_list,context_list,questions_list,q_impossible_list,answers_list]

train_lists = get_c_q_a(train_pd)
dev_lists = get_c_q_a(dev_pd)
context_maxlen = max(map(len, (x.split() for x in train_lists[1])))
question_maxlen = max(map(len, (x.split() for x in train_lists[2])))
answer_maxlen = max(map(len, (x.split() for x in train_lists[4])))
print("Max context length:",context_maxlen)
print("Max question length:",question_maxlen)
print("Max answer length:",answer_maxlen)

Max context length: 653
Max question length: 40
Max answer length: 43


In [6]:
def tokenize_c_q_a(dataset,num_words=None):
    tokenizer = Tokenizer(num_words,filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n'+"''",oov_token='<unk>')
    data = dataset[1]+dataset[2]+dataset[4]
    tokenizer.fit_on_texts(data)
    vocab = {}
    for word,i in tokenizer.word_index.items():
        if num_words is not None:
          if i <= num_words:
            vocab[word] = i
        else:
          vocab[word] = i
    #vocab = tokenizer.word_index
    vocab['<s>'] = len(vocab)+1
    vocab['</s>'] = len(vocab)+1
    id_vocab = {value: key for key, value in vocab.items()}
    return (tokenizer,vocab,id_vocab)

tokenizer_obj,vocab,id_vocab = tokenize_c_q_a(train_lists)

vocab_size = len(vocab)
print("Vocab Size:",vocab_size)

def vectorize_data(tokenizer_obj,train_lists):
    context_seq = tokenizer_obj.texts_to_sequences(train_lists[1])
    question_seq = tokenizer_obj.texts_to_sequences(train_lists[2])
    answer_seq = tokenizer_obj.texts_to_sequences(train_lists[4])
    answer_input_seq = [[vocab['<s>']]+i+[vocab['</s>']] for i in answer_seq]
    answer_target_seq = [i+[vocab['</s>']] for i in answer_seq]
    context_seq_padded = pad_sequences(context_seq,context_maxlen,padding='post', truncating='post')
    question_seq_padded = pad_sequences(question_seq,question_maxlen,padding='post', truncating='post')
    answer_seq_padded = pad_sequences(answer_seq,answer_maxlen,padding='post', truncating='post')
    answer_input_seq_padded = pad_sequences(answer_input_seq,answer_maxlen+2,padding='post', truncating='post')
    answer_target_seq_padded = pad_sequences(answer_target_seq,answer_maxlen+2,padding='post', truncating='post')
    answer_impossible = np.array(train_lists[3])
    indices = np.arange(context_seq_padded.shape[0])
    np.random.seed(19)
    np.random.shuffle(indices)
    context_seq_padded = context_seq_padded[indices]
    question_seq_padded = question_seq_padded[indices]
    answer_seq_padded = answer_seq_padded[indices]
    answer_input_seq_padded = answer_input_seq_padded[indices]
    answer_target_seq_padded = answer_target_seq_padded[indices]
    answer_impossible_shuffled = answer_impossible[indices]
    train_samples = int(((context_seq_padded.shape[0]*.8)//128)*128)
    end_samples = int((context_seq_padded.shape[0]//128)*128)
    train_context_padded_seq = context_seq_padded[:train_samples]
    train_question_seq_padded = question_seq_padded[:train_samples]
    train_answer_seq_padded = answer_seq_padded[:train_samples]
    train_answer_input_seq_padded = answer_input_seq_padded[:train_samples]
    train_answer_target_seq_padded = answer_target_seq_padded[:train_samples]
    train_answer_impossible = answer_impossible_shuffled[:train_samples]
    val_context_padded_seq = context_seq_padded[train_samples:end_samples]
    val_question_seq_padded = question_seq_padded[train_samples:end_samples]
    val_answer_seq_padded = answer_seq_padded[train_samples:end_samples]
    val_answer_input_seq_padded = answer_input_seq_padded[train_samples:end_samples]
    val_answer_target_seq_padded = answer_target_seq_padded[train_samples:end_samples]
    val_answer_impossible = answer_impossible_shuffled[train_samples:end_samples]
    return (train_context_padded_seq,train_question_seq_padded,train_answer_seq_padded,
            train_answer_input_seq_padded,train_answer_target_seq_padded,train_answer_impossible,
            val_context_padded_seq,val_question_seq_padded,val_answer_seq_padded,
            val_answer_input_seq_padded,val_answer_target_seq_padded,val_answer_impossible)

train_context_padded_seq,train_question_seq_padded,train_answer_seq_padded,\
train_answer_input_seq_padded,train_answer_target_seq_padded,\
train_answer_impossible,\
val_context_padded_seq,val_question_seq_padded,val_answer_seq_padded,\
val_answer_input_seq_padded,val_answer_target_seq_padded,\
val_answer_impossible\
= vectorize_data(tokenizer_obj,train_lists)

print("validation num samples where answer impossible: ",len(val_answer_seq_padded[val_answer_impossible==1]))
print("validation num samples where answer not impossible: ",len(val_answer_seq_padded[val_answer_impossible==0]))
print("train num samples where answer impossible: ",len(train_answer_seq_padded[train_answer_impossible==1]))
print("train num samples where answer not impossible: ",len(train_answer_seq_padded[train_answer_impossible==0]))

Vocab Size: 88701
validation num samples where answer impossible:  8730
validation num samples where answer not impossible:  17382
train num samples where answer impossible:  34761
train num samples where answer not impossible:  69431


In [4]:
def create_embedding_matrix(word_index,vocab_size=50000,ndim=100):
    hands = glove_helper.Hands(ndim)
    embedding_matrix = np.zeros((vocab_size+1,ndim))
    for word,i in word_index.items():
        if i<=vocab_size:
            embedding_vector = hands.get_vector(word,strict=False)
            if embedding_vector is not None:
                embedding_matrix[i] = embedding_vector
    return embedding_matrix
ndim = 100
embedding_matrix = create_embedding_matrix(vocab,vocab_size,ndim)

Loading vectors from data/glove/glove.6B.zip
Parsing file: data/glove/glove.6B.zip:glove.6B.100d.txt
Found 400,000 words.
Parsing vectors... Done! (W.shape = (400003, 100))


In [None]:
#Function to create the Models
def create_models(embedding_matrix,
                  num_unit_gru = 64,
                  num_layers_gru = 2,
                  ndim =100,
                  num_episodes = 2,
                  num_dense_layer_feasibility_units = 16,
                  dropout_rate = 0.5,
                  num_dense_layers_feasibility = 1,
                  attentionType = 0, # 0 means Luong's 1 means BahdanauUnits
                  BahdanauUnits = 32):
    
    class BahdanauAttention(layers.Layer):
        def __init__(self, units):
            super(BahdanauAttention, self).__init__()
            self.W1 = layers.Dense(units)
            self.W2 = layers.Dense(units)
            self.V =  layers.Dense(1)

        def call(self, inputs):
            query = inputs[0]
            values = inputs[1]
            #print("query shape",query.shape)
            #print("vaues shape",values.shape)
            # query hidden state shape == (batch_size, hidden size)
            # query_with_time_axis shape == (batch_size, 1, hidden size)
            # values shape == (batch_size, max_len, hidden size)
            # we are doing this to broadcast addition along the time axis to calculate the score
            #query_with_time_axis = tf.expand_dims(query, 1)
            #print("query_with_time_axis shape",query_with_time_axis.shape)
            # score shape == (batch_size, max_length, 1)
            # we get 1 at the last axis because we are applying score to self.V
            # the shape of the tensor before applying self.V is (batch_size, max_length, units)
            score = self.V(keras.activations.tanh(
            self.W1(query) + self.W2(values)))

            # attention_weights shape == (batch_size, max_length, 1)
            attention_weights = keras.activations.softmax(score, axis=1)

            # context_vector shape after sum == (batch_size, hidden_size)
            context_vector = attention_weights * values
            #context_vector = tf.reduce_sum(context_vector, axis=1)
            #print("context vector shape",context_vector.shape)

            return context_vector

    def create_memory_using_attention(num_episodes,query,context_outputs,attention_type,BahdanauUnits=32):
        m = layers.Lambda(lambda x: x)(query)
        Dense_Layer_concat_M_q = layers.Dense(units=query.shape[1],activation='tanh')
        if attention_type == 0:
            #use keras attention which is Luong's
            attention_layer = layers.Attention()
        else:
            # use BahdanauAttention
            attention_layer = BahdanauAttention(BahdanauUnits)
        for i in range(num_episodes):
            m_plus_q =layers.concatenate(inputs=[m,query],axis=1)
            if attention_type ==0:
                m_plus_q = Dense_Layer_concat_M_q(m_plus_q)
                m_plus_q = layers.BatchNormalization()(m_plus_q)
            m_plus_q_with_time_axis = tf.keras.backend.expand_dims(m_plus_q, 1)
            context_with_attention = attention_layer([m_plus_q_with_time_axis,context_outputs])
            m = tf.keras.backend.sum(context_with_attention, axis=1)


        return m
    
    
    #Input Module
    context_input = Input(shape=(None,),dtype='int32',name='Context_Input')
    context_embeddings = layers.Embedding(vocab_size+1,ndim,mask_zero=True,name='Context_Embedding')(context_input)

    for i in range(num_layers_gru):
        context_outputs_layers = layers.Bidirectional(layers.GRU(num_unit_gru,dropout=dropout_rate,
                                                      recurrent_dropout= dropout_rate,
                                                      return_sequences=True),name='Context_Bid_Layer'+str(i))
        if i==0:
            context_outputs = context_outputs_layers(context_embeddings)
        else:
            context_outputs = context_outputs_layers(context_outputs)
        context_outputs = layers.BatchNormalization()(context_outputs)
    print("Context output shape",context_outputs.shape)
    #Question Module
    question_input = Input(shape=(None,),dtype='int32',name='Question_Input')
    question_embeddings = layers.Embedding(vocab_size+1,ndim,mask_zero=True,name='Question_Embedding')(question_input)

    for i in range(num_layers_gru):
        if i==0 and num_layers_gru >1:
            question_outputs = layers.Bidirectional(layers.GRU(num_unit_gru,dropout=dropout_rate,
                                                    recurrent_dropout= dropout_rate,
                                                    return_sequences=True),
                                                    name='Question_Bid_Layer'+str(i))(question_embeddings)
        elif i==0 and num_layers_gru ==1:
            question_outputs = layers.Bidirectional(layers.GRU(num_unit_gru,dropout=dropout_rate,
                                                    recurrent_dropout= dropout_rate,
                                                    return_sequences=False),
                                                    name='Question_Bid_Layer'+str(i))(question_embeddings)
        elif i==(num_layers_gru-1):
            question_outputs = layers.Bidirectional(layers.GRU(num_unit_gru,dropout=dropout_rate,
                                                    recurrent_dropout= dropout_rate,
                                                    return_sequences=False),
                                                    name='Question_Bid_Layer'+str(i))(question_outputs)
        else:
            question_outputs = layers.Bidirectional(layers.GRU(num_unit_gru,dropout=dropout_rate,
                                                    recurrent_dropout= dropout_rate,
                                                    return_sequences=True),
                                                    name='Question_Bid_Layer'+str(i))(question_outputs)
        question_outputs = layers.BatchNormalization()(question_outputs)
    #Episodic Memory Module
    m=create_memory_using_attention(num_episodes,question_outputs,context_outputs,attentionType,BahdanauUnits)
    #print(m.shape)
    #print(context_outputs.shape)
    #print(question_outputs.shape)
    concatenated_tensor = layers.concatenate(inputs=[m,question_outputs],name='Concatenation_Memory_Question',axis=1)
    #answer_module

    answer_input = Input(shape=(None,),dtype='int32',name='Answer_Input')
    answer_embeddings = layers.Embedding(vocab_size+1,ndim,mask_zero=True,name='Answer_Embedding')(answer_input)
    for i in range(num_layers_gru):
        answer_decoder_layers = layers.GRU(concatenated_tensor.shape[1],dropout=dropout_rate,
                                           recurrent_dropout= dropout_rate,
                                           return_sequences=True,
                                           return_state=True,
                                           name='Answer_GRU_Layer'+str(i)
                                           )
        if i==0:
            answer_outputs,_ = answer_decoder_layers(answer_embeddings,initial_state=concatenated_tensor)
        else:
            answer_outputs,_ = answer_decoder_layers(answer_outputs)
        answer_outputs = layers.BatchNormalization()(answer_outputs)
    answer_decoder_dense = layers.TimeDistributed(layers.Dense(vocab_size+1, activation='softmax')
                                                  ,name='Answer_output')
    answer_decoder_outputs = answer_decoder_dense(answer_outputs)

    answer_model = Model([context_input,question_input,answer_input],answer_decoder_outputs)
    answer_model.get_layer("Question_Embedding").set_weights([embedding_matrix])
    answer_model.get_layer("Question_Embedding").trainable = False
    answer_model.get_layer("Context_Embedding").set_weights([embedding_matrix])
    answer_model.get_layer("Context_Embedding").trainable = False
    answer_model.get_layer("Answer_Embedding").set_weights([embedding_matrix])
    answer_model.get_layer("Answer_Embedding").trainable = False
    
    #feasibility module
    feasibility_input = Input(shape=(concatenated_tensor.shape[1],), name="FeasibilityInput")
    for i in range(num_dense_layers_feasibility):
        if i==0:
            dense_layer = layers.Dense(num_dense_layer_feasibility_units,
                            activation='relu',name='feasibility_layer_'+str(i))(feasibility_input)
        else:
            dense_layer = layers.Dense(num_dense_layer_feasibility_units,
                            activation='relu',name='feasibility_layer_'+str(i))(dense_layer)
        dense_layer = layers.BatchNormalization()(dense_layer)
        dropout_layer = layers.Dropout(dropout_rate,name='feasibility_drop_'+str(i))(dense_layer)

    feasibility_output = layers.Dense(1,activation='sigmoid',name='feasibility_output')(dropout_layer)
    feasibility_model = Model(feasibility_input,feasibility_output)

    encoder_model = Model([context_input,question_input], concatenated_tensor)
    decoder_inputs = answer_input
    decoder_state_input_h = Input(shape=(None,), name="DecoderStateInput")


    for i in range(num_layers_gru):
        decoder_layers = answer_model.get_layer('Answer_GRU_Layer'+str(i))
        if i==0:
            decoder_outputs, decoder_state_h = decoder_layers(answer_embeddings,initial_state=decoder_state_input_h)
        else:
            decoder_outputs, decoder_state_h = decoder_layers(decoder_outputs)

    decoder_dense =  answer_model.get_layer('Answer_output')(decoder_outputs)

    decoder_model = Model(
                        [decoder_inputs] + [decoder_state_input_h],
                        [decoder_dense] + [decoder_state_h])
    return (answer_model,encoder_model,decoder_model,feasibility_model)

In [15]:
#Function to get sentences from the predicted answers
def decode_sequence(context_input_seq,
                    question_input_seq,
                    encoder_model,
                    decoder_model):
    # Encode the input as state vectors.
    states_value = encoder_model.predict([context_input_seq,question_input_seq])

    # Generate empty target sequence of length 1.
    target_seq = np.zeros((1, 1))
    current_step = 0
    # Populate the first character of target sequence with the start character.
    target_seq[0, 0] = vocab["<s>"]

    # Sampling loop for a batch of sequences
    # (to simplify, here we assume a batch of size 1).
    stop_condition = False
    decoded_sentence = ''

    while not stop_condition:
        output_tokens, h = decoder_model.predict([target_seq] + [states_value])
        current_step += 1
        # Sample a token
        #print(output_tokens.shape)
        #print(output_tokens[0,0,0])
        #print(output_tokens[0,0,32984])
        sampled_token_index = np.argmax(output_tokens[0, 0, :])
        #print(sampled_token_index)
        if sampled_token_index == 0:
            sampled_char = " "
        else:
            sampled_char = id_vocab[sampled_token_index]
        decoded_sentence += sampled_char + " "

        # Exit condition: either hit max length
        # or find stop character.
        if sampled_char == '</s>' or len(decoded_sentence) > answer_maxlen:
            stop_condition = True

        # Update the target sequence (of length 1).
        target_seq = np.zeros((1, 1))
        target_seq[0, 0] = sampled_token_index

        # Update states
        states_value = h

    return decoded_sentence

In [9]:
#When TPU ENABLED
resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='grpc://' + os.environ['COLAB_TPU_ADDR'])
tf.config.experimental_connect_to_cluster(resolver)
tf.tpu.experimental.initialize_tpu_system(resolver)
strategy = tf.distribute.experimental.TPUStrategy(resolver)

with strategy.scope():
    tpu_answer_model,tpu_encoder_model,tpu_decoder_model,tpu_feasibility_model = create_models(embedding_matrix,
                                                                                      num_unit_gru = 80,
                                                                                      num_layers_gru = 2,
                                                                                      ndim =100,
                                                                                      num_episodes = 2,
                                                                                      num_dense_layer_feasibility_units = 32,
                                                                                      dropout_rate = 0.5,
                                                                                      num_dense_layers_feasibility = 2,
                                                                                      attentionType = 0, # 0 means Luong's 1 means BahdanauUnits
                                                                                      BahdanauUnits = 64)

    tpu_answer_model.compile(optimizer='adam',
                           loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                           metrics=[tf.keras.metrics.SparseCategoricalAccuracy()]
                           )
  
    tpu_feasibility_model.compile(optimizer='adam',
                           loss=tf.keras.losses.BinaryCrossentropy(),
                           metrics=[tf.keras.metrics.BinaryAccuracy()]
                           )
  
  


tpu_history_answer_model = tpu_answer_model.fit({'Context_Input':train_context_padded_seq[:20480],
                                                 'Question_Input':train_question_seq_padded[:20480],
                                                 'Answer_Input':train_answer_input_seq_padded[:20480] },
                                                {'Answer_output':train_answer_target_seq_padded[:20480] },
                                                epochs=200,batch_size=32*8,
                                                validation_data=([val_context_padded_seq[:1024],val_question_seq_padded[:1024],train_answer_input_seq_padded[:1024]],
                                                                 val_answer_target_seq_padded[:1024])
                                                )


encoder_prediction = tpu_encoder_model.predict([train_context_padded_seq[:20480],train_question_seq_padded[:20480]])
encoder_validation_prediction = tpu_encoder_model.predict([val_context_padded_seq[:1024],val_question_seq_padded[:1024]])
tpu_history_feasibility_model = tpu_feasibility_model.fit(encoder_prediction,train_answer_impossible[:20480],
                                                          epochs=200,batch_size=32*8,
                                                          validation_data = (encoder_validation_prediction,val_answer_impossible[:1024])
                                                          )


tpu_answer_model.save('tpu_answer_model_2.h5')
tpu_encoder_model.save('tpu_encoder_model_2.h5')
tpu_decoder_model.save('tpu_decoder_model_2.h5')
tpu_feasibility_model.save('tpu_feasibility_model_2.h5')
with open('tpu_history_answer_model2', 'wb') as file_history:
        pickle.dump(tpu_history_answer_model.history, file_history)
with open('tpu_history_feasibility_model2', 'wb') as file_history:
        pickle.dump(tpu_history_feasibility_model.history, file_history)

INFO:tensorflow:Initializing the TPU system: 10.89.189.58:8470


INFO:tensorflow:Initializing the TPU system: 10.89.189.58:8470


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Finished initializing TPU system.


INFO:tensorflow:Finished initializing TPU system.


INFO:tensorflow:Found TPU system:


INFO:tensorflow:Found TPU system:


INFO:tensorflow:*** Num TPU Cores: 8


INFO:tensorflow:*** Num TPU Cores: 8


INFO:tensorflow:*** Num TPU Workers: 1


INFO:tensorflow:*** Num TPU Workers: 1


INFO:tensorflow:*** Num TPU Cores Per Worker: 8


INFO:tensorflow:*** Num TPU Cores Per Worker: 8


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


Context output shape (None, None, 160)
Train on 20480 samples, validate on 1024 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/20

In [None]:
#When GPU ENABLED
gpu_answer_model,gpu_encoder_model,gpu_decoder_model,gpu_feasibility_model = create_models(embedding_matrix,
                                                                                      num_unit_gru = 16,
                                                                                      num_layers_gru = 2,
                                                                                      ndim =100,
                                                                                      num_episodes = 2,
                                                                                      num_dense_layer_feasibility_units = 16,
                                                                                      dropout_rate = 0.5,
                                                                                      num_dense_layers_feasibility = 2,
                                                                                      attentionType = 0, # 0 means Luong's 1 means BahdanauUnits
                                                                                      BahdanauUnits = 64)

adam_optim = keras.optimizers.Adam(learning_rate=0.01, beta_1=0.9, beta_2=0.999, amsgrad=False)
gpu_answer_model.compile(optimizer=adam_optim,
                           loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                           metrics=[tf.keras.metrics.SparseCategoricalAccuracy()]
                           )

gpu_answer_model.summary()
gpu_feasibility_model.compile(optimizer=adam_optim,
                           loss=tf.keras.losses.BinaryCrossentropy(),
                           metrics=[tf.keras.metrics.BinaryAccuracy()]
                           )
gpu_feasibility_model.summary()
gpu_history_answer_model = gpu_answer_model.fit({'Context_Input':train_context_padded_seq[:100],
                                                 'Question_Input':train_question_seq_padded[:100],
                                                 'Answer_Input':train_answer_input_seq_padded[:100] },
                                                {'Answer_output':train_answer_target_seq_padded[:100] },
                                                epochs=200,batch_size=10,
                                                validation_data=([val_context_padded_seq[:20],val_question_seq_padded[:20],train_answer_input_seq_padded[:20]],
                                                                 val_answer_target_seq_padded[:20]))

In [17]:
encoder_prediction = gpu_encoder_model.predict([train_context_padded_seq[:100],train_question_seq_padded[:100]])
encoder_validation_prediction = gpu_encoder_model.predict([val_context_padded_seq[:20],val_question_seq_padded[:20]])
gpu_history_feasibility_model = gpu_feasibility_model.fit(encoder_prediction,train_answer_impossible[:100],
                                                          epochs=200,batch_size=10,
                                                          validation_data = (encoder_validation_prediction,val_answer_impossible[:20])
                                                          )


gpu_answer_model.save('gpu_answer_model_2.h5')
gpu_encoder_model.save('gpu_encoder_model_2.h5')
gpu_decoder_model.save('gpu_decoder_model_2.h5')
gpu_feasibility_model.save('gpu_feasibility_model_2.h5')
with open('gpu_history_answer_model2', 'wb') as file_history:
        pickle.dump(gpu_history_answer_model.history, file_history)
with open('gpu_history_feasibility_model2', 'wb') as file_history:
        pickle.dump(gpu_history_feasibility_model.history, file_history)

Train on 100 samples, validate on 20 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Ep

Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200
Epoch 113/200
Epoch 114/200
Epoch 115/200
Epoch 116/200
Epoch 117/200
Epoch 118/200
Epoch 119/200
Epoch 120/200
Epoch 121/200
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200
Epoch 132/200
Epoch 133/200
Epoch 134/200
Epoch 135/200
Epoch 136/200
Epoch 137/200
Epoch 138/200
Epoch 139/200
Epoch 140/200
Epoch 141/200
Epoch 142/200
Epoch 143/200
Epoch 144/200
Epoch 145/200
Epoch 146/200
Epoch 147/200
Epoch 148/200
Epoch 149/200
Epoch 150/200
Epoch 151/200
Epoch 152/200
Epoch 153/200
Epoch 154/200
Epoch 155/200
Epoch 156/200
Epoch 157/200
Epoch 158/200
Epoch 159/200
Epoch 160/200
Epoch 161/200
Epoch 162/200
Epoch 163/200
Epoch 164/200
Epoch 165/200
Epoch 166/200
Epoch 167/200
Epoch 168/200
Epoch 169/200
Epoch 170/200
Epoch 171/200
Epoch 172/200
Epoch 

In [25]:
import pickle
favorite_color = pickle.load( open( "gpu_history_answer_model2", "rb" ) )

In [26]:
favorite_color

{'loss': [1.0021465957164764,
  0.8581745982170105,
  0.622495511174202,
  0.454390549659729,
  0.3440942779183388,
  0.3245418131351471,
  0.29935378283262254,
  0.29812543243169787,
  0.2746981203556061,
  0.28004816025495527,
  0.2624157935380936,
  0.26698136031627656,
  0.25928100645542146,
  0.25722589492797854,
  0.25440490916371344,
  0.25769960433244704,
  0.24973910450935363,
  0.2420899584889412,
  0.23934334218502046,
  0.23876624479889869,
  0.234937684237957,
  0.2327175408601761,
  0.22930850982666015,
  0.22928718999028205,
  0.22982289344072343,
  0.225761841237545,
  0.22956837564706803,
  0.22473053336143495,
  0.21714958176016808,
  0.2204847015440464,
  0.21618397086858748,
  0.21761858314275742,
  0.21991693899035453,
  0.21742043346166612,
  0.21079502627253532,
  0.21146002858877183,
  0.20340538248419762,
  0.21868807077407837,
  0.21210432946681976,
  0.20876455157995225,
  0.20554210022091865,
  0.20476813465356827,
  0.20861533284187317,
  0.2063750237226486

In [10]:
inference_answer_model,inference_encoder,inference_decoder,inference_feasibility_model = create_models(embedding_matrix,
                                                                                      num_unit_gru = 80,
                                                                                      num_layers_gru = 2,
                                                                                      ndim =100,
                                                                                      num_episodes = 2,
                                                                                      num_dense_layer_feasibility_units = 32,
                                                                                      dropout_rate = 0.5,
                                                                                      num_dense_layers_feasibility = 2,
                                                                                      attentionType = 0, # 0 means Luong's 1 means BahdanauUnits
                                                                                      BahdanauUnits = 64)

inference_encoder.load_weights('tpu_encoder_model_2.h5')
inference_decoder.load_weights('tpu_decoder_model_2.h5')

Context output shape (None, None, 160)


In [19]:

for seq_index in range(10):
    # Take one sequence (part of the training test)
    # for trying out decoding.
    context_input_seq = train_context_padded_seq[seq_index: seq_index+ 1]
    question_input_seq = train_question_seq_padded[seq_index: seq_index + 1]
    decoded_sentence = decode_sequence(context_input_seq,question_input_seq,gpu_encoder_model,gpu_decoder_model)
    print("question:",' '.join([id_vocab.get(i) for i in train_question_seq_padded[seq_index].tolist() if i !=0]))
    print('Decoded sentence:', decoded_sentence)
    act_answer = ' '.join([id_vocab.get(i) for i in train_answer_input_seq_padded[seq_index].tolist() if i !=0])
    print('Actual answer:',act_answer)

question: in 2003 what well known u s secretary of state declared the situation in darfur as an act of genocide
Decoded sentence: </s> 
Actual answer: <s> colin powell </s>
question: what shape was the sixaxis final model in
Decoded sentence: </s> 
Actual answer: <s> </s>
question: what is a team called that is in the process of joining a league
Decoded sentence: </s> 
Actual answer: <s> a probational franchise </s>
question: what is lord justice sedley s nationality
Decoded sentence: </s> 
Actual answer: <s> </s>
question: what do field windings provide
Decoded sentence: </s> 
Actual answer: <s> flux </s>
question: what period ranged from the 41st to the 29th century bc
Decoded sentence: </s> 
Actual answer: <s> </s>
question: what would be needed to support metadata and obviate the need for tags
Decoded sentence: </s> 
Actual answer: <s> a standard container format </s>
question: how much energy could a capacitor in a disposable camera release
Decoded sentence: </s> 
Actual answer: <

In [18]:
' '.join([id_vocab.get(i) for i in train_answer_input_seq_padded[0].tolist() if i !=0])

'<s> mesrop mashtots </s>'

In [11]:
for seq_index in range(10):
    # Take one sequence (part of the training test)
    # for trying out decoding.
    context_input_seq = train_context_padded_seq[seq_index+3000: seq_index +3000+ 1]
    question_input_seq = train_question_seq_padded[seq_index+3000: seq_index+3000 + 1]
    decoded_sentence = decode_sequence(context_input_seq,question_input_seq,inference_encoder,inference_decoder)
    print('-')
    print('Decoded sentence:', decoded_sentence)

-
Decoded sentence: </s> 
-
Decoded sentence: </s> 
-
Decoded sentence: </s> 
-
Decoded sentence: </s> 
-
Decoded sentence: </s> 
-
Decoded sentence: </s> 
-
Decoded sentence: </s> 
-
Decoded sentence: </s> 
-
Decoded sentence: </s> 
-
Decoded sentence: </s> 


In [27]:
#Define Experiments using random choice
import random
num_unit_gru_list = [32,64,80,100]
num_layers_gru_list = [1,2]
num_episodes_list = [1,2,3]
num_dense_layer_feasibility_units_list = [16,32]
dropout_rate_list = [0.4,0.5,0.6,0.7]
num_dense_layers_feasibility_list = [1,2]
attentionType_list = [0,1]
BahdanauUnits_list = [32,64]
learning_rate_list = [.005,.001]
Experiments = {}
for i in range(6):
    experiment_name = 'Experiment'+str(i)
    experiment_hyperparam_dic = {}
    experiment_num_unit_gru = random.choice(num_unit_gru_list)
    experiment_num_layers_gru = random.choice(num_layers_gru_list)
    experiment_num_episodes = random.choice(num_episodes_list)
    experiment_num_dense_layer_feasibility_units = random.choice(num_dense_layer_feasibility_units_list)
    experiment_dropout_rate = random.choice(dropout_rate_list)
    experiment_num_dense_layers_feasibility = random.choice(num_dense_layers_feasibility_list)
    experiment_attentionType = random.choice(attentionType_list)
    experiment_BahdanauUnits = random.choice(BahdanauUnits_list)
    experiment_learning_rate = random.choice(learning_rate_list)
    experiment_hyperparam_dic['num_unit_gru'] = experiment_num_unit_gru
    experiment_hyperparam_dic['num_layers_gru'] = experiment_num_layers_gru
    experiment_hyperparam_dic['num_episodes'] = experiment_num_episodes
    experiment_hyperparam_dic['num_dense_layer_feasibility_units'] = experiment_num_dense_layer_feasibility_units
    experiment_hyperparam_dic['dropout_rate'] = experiment_dropout_rate
    experiment_hyperparam_dic['num_dense_layers_feasibility'] = experiment_num_dense_layers_feasibility
    experiment_hyperparam_dic['attentionType'] = experiment_attentionType
    experiment_hyperparam_dic['BahdanauUnits'] = experiment_BahdanauUnits
    experiment_hyperparam_dic['learning_rate'] = experiment_learning_rate
    Experiments[experiment_name] = experiment_hyperparam_dic
    
print(Experiments)

{'Experiment0': {'num_unit_gru': 64, 'num_layers_gru': 2, 'num_episodes': 2, 'num_dense_layer_feasibility_units': 16, 'dropout_rate': 0.4, 'num_dense_layers_feasibility': 1, 'attentionType': 0, 'BahdanauUnits': 32, 'learning_rate': 0.005}, 'Experiment1': {'num_unit_gru': 80, 'num_layers_gru': 1, 'num_episodes': 2, 'num_dense_layer_feasibility_units': 32, 'dropout_rate': 0.6, 'num_dense_layers_feasibility': 2, 'attentionType': 0, 'BahdanauUnits': 32, 'learning_rate': 0.001}, 'Experiment2': {'num_unit_gru': 32, 'num_layers_gru': 2, 'num_episodes': 3, 'num_dense_layer_feasibility_units': 16, 'dropout_rate': 0.5, 'num_dense_layers_feasibility': 1, 'attentionType': 0, 'BahdanauUnits': 64, 'learning_rate': 0.005}, 'Experiment3': {'num_unit_gru': 100, 'num_layers_gru': 2, 'num_episodes': 1, 'num_dense_layer_feasibility_units': 16, 'dropout_rate': 0.4, 'num_dense_layers_feasibility': 1, 'attentionType': 1, 'BahdanauUnits': 32, 'learning_rate': 0.001}, 'Experiment4': {'num_unit_gru': 32, 'num_l

In [28]:
Experiments_Dic = {'Experiment0': {'num_unit_gru': 80,
  'num_layers_gru': 1,
  'num_episodes': 2,
  'num_dense_layer_feasibility_units': 32,
  'dropout_rate': 0.5,
  'num_dense_layers_feasibility': 1,
  'attentionType': 0,
  'BahdanauUnits': 64,
  'learning_rate': 0.001},
 'Experiment1': {'num_unit_gru': 80,
  'num_layers_gru': 1,
  'num_episodes': 2,
  'num_dense_layer_feasibility_units': 16,
  'dropout_rate': 0.4,
  'num_dense_layers_feasibility': 1,
  'attentionType': 0,
  'BahdanauUnits': 64,
  'learning_rate': 0.005},
 'Experiment2': {'num_unit_gru': 80,
  'num_layers_gru': 2,
  'num_episodes': 3,
  'num_dense_layer_feasibility_units': 16,
  'dropout_rate': 0.7,
  'num_dense_layers_feasibility': 1,
  'attentionType': 1,
  'BahdanauUnits': 64,
  'learning_rate': 0.005},
 'Experiment3': {'num_unit_gru': 64,
  'num_layers_gru': 2,
  'num_episodes': 2,
  'num_dense_layer_feasibility_units': 32,
  'dropout_rate': 0.6,
  'num_dense_layers_feasibility': 1,
  'attentionType': 1,
  'BahdanauUnits': 64,
  'learning_rate': 0.005},
 'Experiment4': {'num_unit_gru': 64,
  'num_layers_gru': 2,
  'num_episodes': 2,
  'num_dense_layer_feasibility_units': 32,
  'dropout_rate': 0.5,
  'num_dense_layers_feasibility': 1,
  'attentionType': 0,
  'BahdanauUnits': 64,
  'learning_rate': 0.001},
 'Experiment5': {'num_unit_gru': 64,
  'num_layers_gru': 2,
  'num_episodes': 2,
  'num_dense_layer_feasibility_units': 32,
  'dropout_rate': 0.4,
  'num_dense_layers_feasibility': 2,
  'attentionType': 1,
  'BahdanauUnits': 32,
  'learning_rate': 0.005}}