# Sentence Picker Model

## Load preprocessed data

In [0]:
from google.colab import drive
drive.mount('/gdrive')

In [0]:
!pip install -q tensorflow tensorflow-datasets matplotlib
!pip install --upgrade tensorflow-gpu
!pip install tensorflow-hub

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import matplotlib.pyplot as plt
import numpy as np
import tensorflow.compat.v2 as tf
import tensorflow_hub as hub


import tensorflow_datasets as tfds
tfds.disable_progress_bar()
tf.enable_v2_behavior()

In [0]:
import pandas as pd
import time
import tensorflow_hub as hub

In [0]:
train_data=pd.read_pickle('/gdrive/My Drive/Colab Notebooks/NLP project stages/Prototype 4 -- Small Universal 8 layers/train_v1_pd.pkl')

In [0]:
def add_length(item):
    return len(item)
train_data['Context_length']=train_data['Context'].apply(add_length)
# train_data['Question_number']=train_data['Question'].apply(add_length)

In [0]:
def prepare_input(row):
    row['Input']=row['Question']+row['Context']
    row['Expectation']=[0.0]+list(row['Target'])
    return row
train_data=train_data.apply(prepare_input,axis=1)

In [0]:
train_data.tail(3)

Unnamed: 0,ID,Context,Question,Answers,Start,Target,Context_length,Input,Expectation
87596,[5735d259012e2f140011a09f],"[Kathmandu Metropolitan City (KMC), in order t...",[With what Belorussian city does Kathmandu hav...,[Minsk],[476],"[0.0, 0.0, 1.0, 0.0]",4,[With what Belorussian city does Kathmandu hav...,"[0.0, 0.0, 0.0, 1.0, 0.0]"
87597,[5735d259012e2f140011a0a0],"[Kathmandu Metropolitan City (KMC), in order t...",[In what year did Kathmandu create its initial...,[1975],[199],"[0.0, 1.0, 0.0, 0.0]",4,[In what year did Kathmandu create its initial...,"[0.0, 0.0, 1.0, 0.0, 0.0]"
87598,[5735d259012e2f140011a0a1],"[Kathmandu Metropolitan City (KMC), in order t...",[What is KMC an initialism of?],[Kathmandu Metropolitan City],[0],"[1.0, 0.0, 0.0, 0.0]",4,"[What is KMC an initialism of?, Kathmandu Metr...","[0.0, 1.0, 0.0, 0.0, 0.0]"


## Setting up data pipeline

In [0]:
def shuffle_data(dataframe,batch_size):
    batches=len(dataframe)//batch_size
    index=np.arange(batches*batch_size)
    np.random.shuffle(index)
    index=index.reshape(batches,batch_size)
    return index

In [0]:
def get_value_ts(dataframe,index,type):
    if type=='Input':
        return tf.cast(tf.ragged.constant(dataframe.iloc[index][type]).to_tensor(),dtype=tf.string)
    elif type=='Target':
        return tf.cast(tf.ragged.constant(dataframe.iloc[index][type]).to_tensor(),dtype=tf.float32)

In [0]:
get_value_ts(train_data,[3],'Input')

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


<tf.Tensor: id=66, shape=(1, 8), dtype=string, numpy=
array([[b'What is the Grotto at Notre Dame?',
        b'Architecturally, the school has a Catholic character.',
        b"Atop the Main Building's gold dome is a golden statue of the Virgin Mary.",
        b'Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend "Venite Ad Me Omnes".',
        b'Next to the Main Building is the Basilica of the Sacred Heart.',
        b'Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection.',
        b'It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858.',
        b'At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary.']],
      dtype=object)>

In [0]:
def create_padding_mask(input_):
  seq = tf.cast(tf.math.equal(input_,''), tf.float32)
  
  # add extra dimensions to add the padding
  # to the attention logits.
  return seq[:, tf.newaxis, tf.newaxis, :]  # (batch_size, 1, 1, seq_len)

In [0]:
create_padding_mask(get_value_ts(train_data,[0,10,30],'Input'))

<tf.Tensor: id=140, shape=(3, 1, 1, 8), dtype=float32, numpy=
array([[[[0., 0., 0., 0., 0., 0., 0., 0.]]],


       [[[0., 0., 0., 0., 0., 0., 0., 1.]]],


       [[[0., 0., 0., 0., 0., 0., 0., 1.]]]], dtype=float32)>

## **Setting Hyper Parameter**


In [0]:
# Hyper Parameter
embedding_model="https://tfhub.dev/google/universal-sentence-encoder-large/5"

Embedding_dimension=512 # Must be consistent with the embedding model chosen
Embedding_expansion=1024
Layer_num=8
num_heads=16
batch_size=32
embed_training=False
assert Embedding_expansion % num_heads == 0
learning_rate=0.0001

## Define model


In [0]:
def scaled_dot_product_attention(q, k, v, mask=None):
  """
  Args:
    q: query shape == (batch,seq_q, depth_expan) 
    k: key shape == (seq_len_k, depth_expan) 
    v: value shape == (seq_len_v, depth_expan)
    mask: Float tensor with shape broadcastable 
          to (..., seq_len_q, seq_len_k). Defaults to None.
    
  Returns:
    output, attention_weights
  """

  matmul_qk = tf.matmul(q, k, transpose_b=True)  # (batch, seq_len_q, seq_len_k) (4,3)
#   print(matmul_qk.shape)
  # scale matmul_qk
  dk = tf.cast(tf.shape(k)[-1], tf.float32)
  scaled_attention_logits = matmul_qk / tf.math.sqrt(dk)
  
  # add the mask to the scaled tensor.
  if mask is not None:
    
    scaled_attention_logits += (mask * -1e9)  

  # softmax is normalized on the last axis (seq_len_k) so that the scores
  # add up to 1.
  attention_weights = tf.nn.softmax(scaled_attention_logits, axis=-1)  # (batch, len_q, seq_len_k)
#   print(attention_weights.shape)
  output = tf.matmul(attention_weights, v)  # (batch, seq_len_q, depth_v)
#   print(output.shape)
#   print('Logit shape',attention_weights.shape)
  return output, attention_weights

In [0]:
# Test code for scaled dot-product attention:
def print_out(q, k, v):
  temp_out, temp_attn = scaled_dot_product_attention(
      q, k, v, None)
  print ('Attention weights are:')
  print (temp_attn)
  print ('Output is:')
  print (temp_out)

In [0]:
k = tf.constant([[10,0,0,],
                      [0,10,0,]], dtype=tf.float32)  # (2, 3)

v = k
q = tf.constant([[0, 10, 0],[0, 0, 11],[20, 0, 11],[0, 0, 11]], dtype=tf.float32)  # (4, 3)

In [0]:
temp_k = tf.constant([[[10,0,0,],
                      [0,10,0,],
                      [0,0,10],
                      [0,0,10]],[[10,0,0,],
                      [0,10,0,],
                      [0,0,10],
                      [0,0,10]]], dtype=tf.float32)  # (2,4, 3)

temp_q = tf.constant([[[0, 10, 0],[0, 0, 11]],[[0, 10, 0],[0, 0, 11]]], dtype=tf.float32)  # (2,2, 3)

In [0]:
print_out(temp_q, temp_k, temp_k)

Attention weights are:
tf.Tensor(
[[[8.4332738e-26 1.0000000e+00 8.4332738e-26 8.4332738e-26]
  [1.3108893e-28 1.3108893e-28 5.0000000e-01 5.0000000e-01]]

 [[8.4332738e-26 1.0000000e+00 8.4332738e-26 8.4332738e-26]
  [1.3108893e-28 1.3108893e-28 5.0000000e-01 5.0000000e-01]]], shape=(2, 2, 4), dtype=float32)
Output is:
tf.Tensor(
[[[8.4332741e-25 1.0000000e+01 1.6866548e-24]
  [1.3108892e-27 1.3108892e-27 1.0000000e+01]]

 [[8.4332741e-25 1.0000000e+01 1.6866548e-24]
  [1.3108892e-27 1.3108892e-27 1.0000000e+01]]], shape=(2, 2, 3), dtype=float32)


In [0]:
class MultiHeadSentenceAttention(tf.keras.layers.Layer):
  def __init__(self, d_model,num_heads):
    
    super(MultiHeadSentenceAttention, self).__init__()
    self.num_heads = num_heads
    self.d_model=d_model
    assert d_model % self.num_heads == 0
    
    self.depth = self.d_model // self.num_heads

    self.wq = tf.keras.layers.Dense(d_model)
    self.wk = tf.keras.layers.Dense(d_model)
    self.wv = tf.keras.layers.Dense(d_model)
    
    self.dense = tf.keras.layers.Dense(d_model)
        
  def split_heads(self, x,batch_size): # x question size(3,250)
    """Split the last dimension into (num_heads, depth).
    Transpose the result such that the shape is (batch_size, num_heads, seq_len, depth) #()
    """
    
    x = tf.reshape(x, (batch_size,-1, self.num_heads, self.depth)) 

    return tf.transpose(x, perm=[0, 2, 1, 3])           # (batch_size,3,5,50)
    
  def call(self, v, k, q, mask=None):
    batch_size = tf.shape(q)[0]
    q = self.wq(q)  # (seq_len, d_expan)     #(batch_size,3,250)
    
    k = self.wk(k)  # (seq_len, d_expan)     #(batch_size,4,250)

    v = self.wv(v)  # (seq_len, d_expan)     #(batch_size,4,250)

    
    q = self.split_heads(q,batch_size)  # (num_heads, seq_len_q, depth)    # (5,3,50)
    k = self.split_heads(k,batch_size)  # (num_heads, seq_len_k, depth)    # (5,4,50)
    v = self.split_heads(v,batch_size)  # (num_heads, seq_len_v, depth)    # (5,4,50)
    
    # scaled_attention.shape == (num_heads, seq_len_q, depth)
    # attention_weights.shape == (num_heads, seq_len_q, seq_len_k)
    scaled_attention, attention_weights = scaled_dot_product_attention(q, k, v, mask)
    
    scaled_attention = tf.transpose(scaled_attention, perm=[0, 2, 1, 3])  # (batch_size, seq_len_q, num_heads, depth)

    concat_attention = tf.reshape(scaled_attention, (batch_size,-1, self.d_model))  # (batch_size, seq_len_q, d_expan)
    # print('Concat Attention size:',concat_attention.shape)
    output = self.dense(concat_attention)  # (batch_size, seq_len_q, d_expan)
    return output, attention_weights

In [0]:
#Test code for Multihead Sentence Attention
temp_mha = MultiHeadSentenceAttention(d_model=3, num_heads=3)



temp_k = tf.constant([[[10,0,0,],
                      [0,10,0,],
                      [0,0,10],
                      [0,0,10]],[[10,0,0,],
                      [0,10,0,],
                      [0,0,10],
                      [0,0,10]]], dtype=tf.float32)  # (2,4, 3)

temp_q = tf.constant([[[0, 10, 0],[0, 0, 11]],[[0, 10, 0],[0, 0, 11]]], dtype=tf.float32)  # (2,2, 3)

out, attn = temp_mha(temp_k, temp_k, temp_q, mask=None)
out.shape, attn.shape

(TensorShape([2, 2, 3]), TensorShape([2, 3, 2, 4]))

In [0]:
print_out(out,temp_k,temp_k)

Attention weights are:
tf.Tensor(
[[[2.1095254e-20 5.7743830e-07 4.9999970e-01 4.9999970e-01]
  [5.2360864e-28 5.0496842e-12 5.0000000e-01 5.0000000e-01]]

 [[2.1095254e-20 5.7743830e-07 4.9999970e-01 4.9999970e-01]
  [5.2360864e-28 5.0496842e-12 5.0000000e-01 5.0000000e-01]]], shape=(2, 2, 4), dtype=float32)
Output is:
tf.Tensor(
[[[2.1095254e-19 5.7743828e-06 9.9999943e+00]
  [5.2360866e-27 5.0496843e-11 1.0000000e+01]]

 [[2.1095254e-19 5.7743828e-06 9.9999943e+00]
  [5.2360866e-27 5.0496843e-11 1.0000000e+01]]], shape=(2, 2, 3), dtype=float32)


In [0]:
def point_wise_feed_forward_network(d_model, dff):
  return tf.keras.Sequential([
      tf.keras.layers.Dense(dff, activation='relu'),  # (batch_size, seq_len, dff)
      tf.keras.layers.Dense(d_model)  # (batch_size, seq_len, d_model)
  ])

In [0]:
class EncoderLayer(tf.keras.layers.Layer):
  def __init__(self, d_model, num_heads, dff, rate=0.1):
    super(EncoderLayer, self).__init__()

    self.mha = MultiHeadSentenceAttention(d_model, num_heads)
    self.ffn = point_wise_feed_forward_network(d_model, dff)

    self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
    self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
    
    self.dropout1 = tf.keras.layers.Dropout(rate)
    self.dropout2 = tf.keras.layers.Dropout(rate)
    
  def call(self, x, training,mask=None):

    attn_output, _ = self.mha(x, x, x,mask)  # (batch_size, input_seq_len, d_model)
    attn_output = self.dropout1(attn_output, training=training)
    out1 = self.layernorm1(x + attn_output)  # (batch_size, input_seq_len, d_model)
    
    ffn_output = self.ffn(out1)  # (batch_size, input_seq_len, d_model)
    ffn_output = self.dropout2(ffn_output, training=training)
    out2 = self.layernorm2(out1 + ffn_output)  # (batch_size, input_seq_len, d_model)
    
    return out2

  
    

In [0]:
# Encoder Tesst code
temp_k = tf.constant([[[10,0,0,],
                      [0,10,0,],
                      [0,0,10],
                      [0,0,10]],[[10,0,0,],
                      [0,10,0,],
                      [0,0,10],
                      [0,0,10]]], dtype=tf.float32)  # (2,4, 3)
test_code=EncoderLayer(3,3,9,rate=0.1)
test_code(temp_k,training=False)

<tf.Tensor: id=689, shape=(2, 4, 3), dtype=float32, numpy=
array([[[ 1.2182857 ,  0.01281665, -1.2311022 ],
        [-1.0076237 ,  1.3631828 , -0.35555923],
        [-0.47795343, -0.9137027 ,  1.3916562 ],
        [-0.47795343, -0.9137027 ,  1.3916562 ]],

       [[ 1.2182857 ,  0.01281665, -1.2311022 ],
        [-1.0076237 ,  1.3631828 , -0.35555923],
        [-0.47795343, -0.9137027 ,  1.3916562 ],
        [-0.47795343, -0.9137027 ,  1.3916562 ]]], dtype=float32)>

In [0]:
class Encoder(tf.keras.layers.Layer):
  def __init__(self, num_layers, d_model, num_heads, dff, embedding_model_link, rate=0.1):
    super(Encoder, self).__init__()

    self.d_model = d_model
    self.num_layers = num_layers
    self.embedding = hub.KerasLayer(embedding_model_link,input_shape=[],trainable=True)
    
    self.enc_layers = [EncoderLayer(d_model, num_heads, dff, rate) 
                       for _ in range(num_layers)]
  
    self.dropout = tf.keras.layers.Dropout(rate)
    
        
  def call(self, x, training,mask=None):

    seq_len = tf.shape(x)[1]
    out_list=None
    for batch in x:
        temp=self.embedding(batch)

        temp=temp[tf.newaxis,...]
        if out_list is None:
            out_list=temp
        else:
            out_list=tf.concat([out_list,temp],axis=0)
    x=out_list # (batch_size, input_seq_len, d_model)
    
    x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))

    x = self.dropout(x, training=training)
    
    for i in range(self.num_layers):
      x = self.enc_layers[i](x, training,mask)
    
    return x  # (batch_size, input_seq_len, d_model)

In [0]:
temp_k = get_value_ts(train_data,[4,5],'Input')  # expect (2,2, 125)

test_code=Encoder(4,250,5,500,embedding_model,)
encode_out=test_code(temp_k,False)
encode_out.shape

TensorShape([2, 12, 250])

In [0]:
class Mymodel(tf.keras.Model):
  def __init__(self, num_layers, d_model, num_heads, dff,embedding_model_link,rate=0.1):
    super(Mymodel, self).__init__()

    self.encoder = Encoder(num_layers, d_model, num_heads, dff, embedding_model_link, rate)
    
    self.output_layer1 = tf.keras.layers.Dense(1)
    
    self.d_model=d_model
    
    
  def call(self, para, training, mask=None):

    enc_output = self.encoder(para,training,mask)  # (batch_size, inp_seq_len, d_model)
    sentence_num=enc_output.shape[-2]
    
    output=tf.reshape(enc_output,(-1,self.d_model))
    
    output=self.output_layer1(output)
    output=tf.reshape(output,(-1,sentence_num))
    
    if mask is not None:
        mask=tf.squeeze(mask,axis=[1,2])
        output += (mask * -1e9)
    output=output[:,1:]
    output=tf.nn.softmax(output,axis=-1)
    return output

In [0]:
# model test code
temp_k = get_value_ts(train_data,[4,5,6],'Input')  # expect (3,11, 250)

test_code=Mymodel(4,250,5,500,embedding_model)
answer=test_code(temp_k,False)
answer.shape

TensorShape([3, 11])

In [0]:
AnswerLocator=""
AnswerLocator=Mymodel(Layer_num,Embedding_dimension,num_heads,Embedding_expansion,embedding_model,0.05)

## Training Setup


In [0]:
optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98, 
                                     epsilon=1e-9)

In [0]:
loss_fn= tf.keras.losses.CategoricalCrossentropy()
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy')

In [0]:


# @tf.function()#input_signature=train_step_signature)
def train_step(context, target,mask=None):
  
  with tf.GradientTape() as tape:
    predictions = AnswerLocator(context,True,mask)
    loss = loss_fn(target, predictions)

  gradients = tape.gradient(loss, AnswerLocator.trainable_variables)    
  optimizer.apply_gradients(zip(gradients, AnswerLocator.trainable_variables))
  
  train_loss(loss)
  train_accuracy(target, predictions)

In [0]:
#Check Point setup:
checkpoint_path = "/gdrive/My Drive/Colab Notebooks/NLP project stages/Prototype 4 -- Small Universal 8 layers/Universal Large/"

ckpt = tf.train.Checkpoint(AnswerLocator=AnswerLocator,
                           optimizer=optimizer)

ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=2)

# if a checkpoint exists, restore the latest checkpoint.
if ckpt_manager.latest_checkpoint:
  ckpt.restore(ckpt_manager.latest_checkpoint)
  print ('Latest checkpoint restored!!')

In [0]:
# Evaluation function with Beam of top two answers
# @tf.function
def eval_step(Inputs,Validation):
  correct=0
  count=0
  predictions = AnswerLocator(Inputs,False,None)
  for n,prediction in enumerate(predictions):
      count+=1
      if len(prediction)<3:
        result=np.argmax(prediction)
        
        if result in Validation:
            correct+=1
      else:
        valid=Validation[0]
        if valid in np.argsort(prediction)[-2:]:
            correct+=1

  return correct,count


In [0]:
dev_data=pd.read_pickle('/gdrive/My Drive/Colab Notebooks/NLP project stages/Prototype 4 -- Small Universal 8 layers/dev_v1_pd.pkl')


In [0]:
dev_data.tail(5)

Unnamed: 0,ID,Context,Question,Answers,Start,Target,Input,Validation
10565,[5737aafd1c456719005744fb],"[The pound-force has a metric counterpart, les...",[What is the metric term less used than the Ne...,"[kilogram-force, pound-force, kilogram-force (...","[82, 4, 82, 82, 78]","[[1.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0], [...",[What is the metric term less used than the Ne...,"[0, 0, 0, 0, 0]"
10566,[5737aafd1c456719005744fc],"[The pound-force has a metric counterpart, les...",[What is the kilogram-force sometimes reffered...,"[kilopond, kilopond, kilopond, kilopond, kilop...","[114, 114, 114, 114, 114]","[[1.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0], [...",[What is the kilogram-force sometimes reffered...,"[0, 0, 0, 0, 0]"
10567,[5737aafd1c456719005744fd],"[The pound-force has a metric counterpart, les...",[What is a very seldom used unit of mass in th...,"[slug, metric slug, metric slug, metric slug, ...","[274, 267, 267, 267, 263]","[[0.0, 1.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0], [...",[What is a very seldom used unit of mass in th...,"[1, 1, 1, 1, 1]"
10568,[5737aafd1c456719005744fe],"[The pound-force has a metric counterpart, les...",[What seldom used term of a unit of force equa...,"[kip, kip, kip, kip, kip]","[712, 712, 712, 712, 712]","[[0.0, 0.0, 0.0, 1.0], [0.0, 0.0, 0.0, 1.0], [...",[What seldom used term of a unit of force equa...,"[3, 3, 3, 3, 3]"
10569,[5737aafd1c456719005744ff],"[The pound-force has a metric counterpart, les...",[What is the seldom used force unit equal to o...,"[sthène, sthène, sthène, sthène, sthène]","[665, 665, 665, 665, 665]","[[0.0, 0.0, 0.0, 1.0], [0.0, 0.0, 0.0, 1.0], [...",[What is the seldom used force unit equal to o...,"[3, 3, 3, 3, 3]"


In [0]:
def eval_result():
  start = time.time()
  
  dev_correct=0
  dev_count=0
  
  dev_index=np.arange(len(dev_data))


  # inp -> portuguese, tar -> english
  for batch,index in enumerate(dev_index):
    Inputs=get_value_ts(dev_data,[index],'Input')
    # print('Context shape is ',context.shape,type(context))
    Validation=dev_data.iloc[index]['Validation']
    # print('Target shape is ',target.shape,type(target))
    
    correct,count=eval_step(Inputs,Validation)
    dev_correct+=correct
    dev_count+=count
    
  print ('Dev Accuracy {:.4f}'.format(dev_correct/dev_count))

  print ('Time taken: {} secs\n'.format(time.time() - start))

## Execute Training

In [26]:
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
EPOCHS=2

for epoch in range(EPOCHS):
  start = time.time()
  
  train_loss.reset_states()
  train_accuracy.reset_states()
  
  train_index=shuffle_data(train_data,batch_size)


  for batch,index in enumerate(train_index):
    Inputs=get_value_ts(train_data,index,'Input')
    
    target=get_value_ts(train_data,index,'Target')
    
    
    mask=create_padding_mask(Inputs)
    train_step(Inputs,target,mask)
    
    if batch % 100 == 0:
      print ('Epoch {} Batch {} Loss {:.4f} Accuracy {:.4f}'.format(
          epoch + 1, batch, train_loss.result(), train_accuracy.result()))
      
  
  ckpt_save_path = ckpt_manager.save()
  
#   eval_result()
  print ('Saving checkpoint for epoch {} at {}'.format(epoch+1,ckpt_save_path))
    
  print ('Epoch {} Loss {:.4f} Accuracy {:.4f}'.format(epoch + 1, 
                                                train_loss.result(), 
                                                train_accuracy.result()))
  
  print ('Time taken for 1 epoch: {} secs\n'.format(time.time() - start))

Epoch 1 Batch 0 Loss 1.5197 Accuracy 0.3125
Epoch 1 Batch 100 Loss 1.5441 Accuracy 0.2559
Epoch 1 Batch 200 Loss 1.5409 Accuracy 0.2516
Epoch 1 Batch 300 Loss 1.5142 Accuracy 0.2759
Epoch 1 Batch 400 Loss 1.4895 Accuracy 0.2930
Epoch 1 Batch 500 Loss 1.4660 Accuracy 0.3074
Epoch 1 Batch 600 Loss 1.4352 Accuracy 0.3284
Epoch 1 Batch 700 Loss 1.4036 Accuracy 0.3531
Epoch 1 Batch 800 Loss 1.3629 Accuracy 0.3799
Epoch 1 Batch 900 Loss 1.3168 Accuracy 0.4081
Epoch 1 Batch 1000 Loss 1.2786 Accuracy 0.4321
Epoch 1 Batch 1100 Loss 1.2416 Accuracy 0.4549
Epoch 1 Batch 1200 Loss 1.2050 Accuracy 0.4754
Epoch 1 Batch 1300 Loss 1.1754 Accuracy 0.4927
Epoch 1 Batch 1400 Loss 1.1471 Accuracy 0.5087
Epoch 1 Batch 1500 Loss 1.1212 Accuracy 0.5234
Epoch 1 Batch 1600 Loss 1.0986 Accuracy 0.5362
Epoch 1 Batch 1700 Loss 1.0769 Accuracy 0.5484
Epoch 1 Batch 1800 Loss 1.0564 Accuracy 0.5597
Epoch 1 Batch 1900 Loss 1.0387 Accuracy 0.5698
Epoch 1 Batch 2000 Loss 1.0222 Accuracy 0.5792
Epoch 1 Batch 2100 Loss 1

## Evaluation

In [27]:
#Evaluation result without using beam 1epoch
def eval_step(Inputs,Validation):
  correct=0
  count=0
  predictions = AnswerLocator(Inputs,False,None)
  for n,prediction in enumerate(predictions):
      count+=1
      
      result=np.argmax(prediction)
      
      if result in Validation:
          correct+=1
  return correct,count
eval_result()

Dev Accuracy 0.8297
Time taken: 888.161276102066 secs



In [28]:
#Evaluation result with beam K=2, 1epoch
def eval_step(Inputs,Validation):
  correct=0
  count=0
  predictions = AnswerLocator(Inputs,False,None)
  for n,prediction in enumerate(predictions):
      count+=1
      if len(prediction)<3:
        result=np.argmax(prediction)
        
        if result in Validation:
            correct+=1
      else:
        valid=Validation[0]
        if valid in np.argsort(prediction)[-2:]:
            correct+=1

  return correct,count
eval_result()

Dev Accuracy 0.9195
Time taken: 885.4403672218323 secs



In [0]:
para=['I had hot dog for breakfast.','I ate steak for dinner.']
question=['What did i have for dinner?','What did i eat in the morning?']
Inputs=[]i5hd
for q in question:
    temp=[q]+para
    Inputs.append(temp)
result=AnswerLocator(Inputs,False,None)
result

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[0.5007382 , 0.4992618 ],
       [0.50087184, 0.4991281 ]], dtype=float32)>

In [0]:
para=['I went to work in the morning and went home at night.','I like to eat chocolate, but I hate to eat bananas.']
question=['What did i do in the morning?','when did i go to work?','What do i hate to eat?']
Inputs=[]
for q in question:
    temp=[q]+para
    Inputs.append(temp)
AnswerLocator(Inputs,False,None)

<tf.Tensor: shape=(3, 2), dtype=float32, numpy=
array([[0.92415655, 0.0758434 ],
       [0.96577615, 0.03422388],
       [0.06499044, 0.93500954]], dtype=float32)>

In [0]:
para=["Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season.",\
       "The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title.",\
       "The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California.",
       'As this was the 50th Super Bowl, the league emphasized the "golden anniversary" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as "Super Bowl L"), so that the logo could prominently feature the Arabic numerals 50.']
question=['Which NFL team represented the AFC at Super Bowl 50?',\
          'Which NFL team represented the NFC at Super Bowl 50?',\
          'What venue did Super Bowl 50 take place in?',\
          'What team was the champion?']
Inputs=[]
for q in question:
    temp=[q]+para
    Inputs.append(temp)
result=AnswerLocator(Inputs,False,None)
result

<tf.Tensor: shape=(4, 4), dtype=float32, numpy=
array([[0.33483383, 0.6187628 , 0.01326892, 0.03313452],
       [0.3255554 , 0.63913226, 0.01367904, 0.02163324],
       [0.24305443, 0.09530964, 0.4309794 , 0.2306565 ],
       [0.04987707, 0.59426653, 0.3273321 , 0.02852429]], dtype=float32)>

In [29]:
para=['A problem is regarded as inherently difficult if its solution requires significant resources, whatever the algorithm used.',\
      'The theory formalizes this intuition, by introducing mathematical models of computation to study these problems and quantifying the amount of resources needed to solve them, such as time and storage.',\
      'Other complexity measures are also used, such as the amount of communication (used in communication complexity), the number of gates in a circuit (used in circuit complexity) and the number of processors (used in parallel computing).',\
      'One of the roles of computational complexity theory is to determine the practical limits on what computers can and cannot do.']
question=['What measure of a computational problem broadly defines the inherent difficulty of the solution?',\
          'What method is used to intuitively assess or quantify the amount of resources required to solve a computational problem?',\
          'What are two basic primary resources used to guage 、
        ',\
          'What unit is measured to determine circuit complexity?']
Inputs=[]
for q in question:
    temp=[q]+para
    Inputs.append(temp)
result=AnswerLocator(Inputs,False,None)
result

<tf.Tensor: shape=(4, 4), dtype=float32, numpy=
array([[0.9470038 , 0.025538  , 0.0069189 , 0.02053931],
       [0.01949034, 0.78748393, 0.12155607, 0.07146969],
       [0.03315064, 0.22308715, 0.65726006, 0.08650219],
       [0.03066365, 0.06795207, 0.8401124 , 0.06127184]], dtype=float32)>