## Import all necessary libraries

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
import pandas as pd
from keras.utils.vis_utils import plot_model
from keras.callbacks import History
import math

In [3]:
import argparse

## Load dataset using CURL

In [2]:
!curl https://storage.googleapis.com/gresearch/dakshina/dakshina_dataset_v1.0.tar --output daksh.tar
!tar -xvf  'daksh.tar' 

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 1915M  100 1915M    0     0  97.2M      0  0:00:19  0:00:19 --:--:-- 78.4M
dakshina_dataset_v1.0/bn/
dakshina_dataset_v1.0/bn/lexicons/
dakshina_dataset_v1.0/bn/lexicons/bn.translit.sampled.test.tsv
dakshina_dataset_v1.0/bn/lexicons/bn.translit.sampled.train.tsv
dakshina_dataset_v1.0/bn/lexicons/bn.translit.sampled.dev.tsv
dakshina_dataset_v1.0/bn/native_script_wikipedia/
dakshina_dataset_v1.0/bn/native_script_wikipedia/bn.wiki-filt.valid.text.shuf.txt.gz
dakshina_dataset_v1.0/bn/native_script_wikipedia/bn.wiki-full.info.sorted.tsv.gz
dakshina_dataset_v1.0/bn/native_script_wikipedia/bn.wiki-filt.train.info.sorted.tsv.gz
dakshina_dataset_v1.0/bn/native_script_wikipedia/bn.wiki-filt.train.text.sorted.tsv.gz
dakshina_dataset_v1.0/bn/native_script_wikipedia/bn.wiki-filt.train.text.shuf.txt.gz
dakshina_dataset_v1.0/bn/native_script

## Login to wandb

In [4]:
!pip install wandb
!wandb login
import wandb
from wandb.keras import WandbCallback

[34m[1mwandb[0m: Currently logged in as: [33makshaygrao[0m (use `wandb login --relogin` to force relogin)


In [5]:
# wandb.init(project="DeepLearningAssignment-3", entity='cs21s002-ee21s113-dlassignment-1')
wandb.init(project="DL-Assignment3", entity='cs21s002-ee21s113-dlassignment-1')

[34m[1mwandb[0m: Currently logged in as: [33makshaygrao[0m (use `wandb login --relogin` to force relogin)


## Pre-process dataset

In [7]:
encoder_model_prefix = "Enc_"
decoder_model_prefix = "Dec_"

In [8]:
def obtain_input_target_data_from_path(path,tokenizer_obj):
  input_texts = []
  target_texts = []
  
  df = pd.read_csv(path,sep="\t",names=["1", "2","3"]).astype(str)
  if tokenizer_obj is None:
    # Shuffle rows in random order with a fixed seed(for reproducability)
    df=df.sample(frac=1,random_state=1)
  # Add all the  input and target texts with start sequence and end sequence added to target 
  for index, row in df.iterrows():
      input_text=row['2']
      target_text= row['1']
      # Skip empty lines/words
      if target_text =='</s>' or input_text=='</s>':
        continue
      
      target_text = "\t" + target_text + "\n"
      input_texts.append(input_text)
      target_texts.append(target_text)
  
  return input_texts, target_texts

In [9]:
def convert_text_to_sequences(tokenizer_obj,inp_texts):
  if tokenizer_obj is None:
    tokenizer_obj = tf.keras.preprocessing.text.Tokenizer(filters='', char_level=True)
    tokenizer_obj.fit_on_texts(inp_texts)
  ret_tensor = tokenizer_obj.texts_to_sequences(inp_texts)
  ret_tensor = tf.keras.preprocessing.sequence.pad_sequences(ret_tensor,padding='post')

  return ret_tensor,tokenizer_obj

In [10]:
# This method converts a dataset(from path) to input and target sequences
def pre_process_data(path,input_tokenizer=None,target_tokenizer=None,input_length=None,target_length=None):
  
  input_texts, target_texts = obtain_input_target_data_from_path(path,input_tokenizer)
  
  input_tensor,input_tokenizer = convert_text_to_sequences(input_tokenizer,input_texts)
  
  target_tensor,target_tokenizer = convert_text_to_sequences(target_tokenizer,target_texts)
  
  # Above functions return padded version wrt longest sequence in the given list of sequence
  # The below function, pads more zeros wrt input_length and target_length
  if input_length is not None and target_length is not None:
      input_tensor=tf.concat([input_tensor,tf.zeros((input_tensor.shape[0],input_length-input_tensor.shape[1]))],axis=1)
      target_tensor=tf.concat([target_tensor,tf.zeros((target_tensor.shape[0],target_length-target_tensor.shape[1]))],axis=1)
  return input_texts,input_tensor,input_tokenizer,target_texts,target_tensor,target_tokenizer

In [11]:
transliteration_target_language = 'kn'

In [12]:
train_input_texts,train_input_tensor,input_tokenizer,train_target_texts,train_target_tensor,target_tokenizer = pre_process_data("/content/dakshina_dataset_v1.0/"+transliteration_target_language+"/lexicons/"+transliteration_target_language+".translit.sampled.train.tsv")
# Only training dataset is used to fit the tokenizer on text. Other datasets just use this vocab for pre-processing
# The length for padding is also set from training datasets
val_input_texts,val_input_tensor,val_input_tokenizer,val_target_texts,val_target_tensor,val_target_tokenizer = pre_process_data("/content/dakshina_dataset_v1.0/"+transliteration_target_language+"/lexicons/"+transliteration_target_language+".translit.sampled.dev.tsv",input_tokenizer,target_tokenizer,train_input_tensor.shape[1],train_target_tensor.shape[1])
test_input_texts,test_input_tensor,test_input_tokenizer,test_target_texts,test_target_tensor,test_target_tokenizer = pre_process_data("/content/dakshina_dataset_v1.0/"+transliteration_target_language+"/lexicons/"+transliteration_target_language+".translit.sampled.test.tsv",input_tokenizer,target_tokenizer,train_input_tensor.shape[1],train_target_tensor.shape[1])

In [14]:
num_encoder_tokens = len(input_tokenizer.word_index)+1
num_decoder_tokens = len(target_tokenizer.word_index)+1
max_encoder_seq_length =  train_input_tensor.shape[1]
max_decoder_seq_length = train_target_tensor.shape[1]

In [15]:
print(num_encoder_tokens)
print(num_decoder_tokens)
print(max_encoder_seq_length)
print(max_decoder_seq_length)

27
64
26
26


## Utility functions

In [18]:
def get_encoder_decoder_layers_from_model(model):
  decoder_layers = 0
  encoder_layers = 0
  for each_layer in model.layers:
    layer_name = each_layer.name
    if(decoder_model_prefix+"cell" in layer_name):
      decoder_layers += 1
    elif(encoder_model_prefix+"cell" in layer_name):
      encoder_layers += 1
  return encoder_layers,decoder_layers

In [19]:
def get_latent_dim_from_model(model):
  return model.get_layer(str(encoder_model_prefix)+"cell_0").output[0].shape[2]

In [20]:
def get_rnntype_from_model(model):
  if isinstance(model.get_layer(encoder_model_prefix+"cell_0"), keras.layers.LSTM):
    return 'LSTM'
  elif isinstance(model.get_layer(encoder_model_prefix+"cell_0"), keras.layers.GRU):
    return "GRU"
  elif isinstance(model.get_layer(encoder_model_prefix+"cell_0"), keras.layers.RNN):
    return "RNN"

In [31]:
def get_optimizer(code,lr):
  if(code=="SGD"):
    return keras.optimizers.SGD(lr)
  elif(code == "RMSprop"):
    return keras.optimizers.RMSprop(lr)
  elif(code == "Adam"):
    return keras.optimizers.Adam(lr)
  elif(code == "Nadam"):
    return keras.optimizers.Nadam(lr)
  else:
    return keras.optimizers.Adam(lr)

In [24]:
index_to_char_target = dict((target_tokenizer.word_index[key], key) for key in target_tokenizer.word_index.keys())

## Code for constructing seq-seq model

In [16]:
def build_layered_RNN_model(rnn_type,embedding_in_dim,embedding_out_dim,layers,dropout,inp_length,model_out_dim,prefix,initial_state = None):
   #input layer ; takes in tokenize input
  model_inputs = keras.Input(shape=( inp_length),name=prefix+"inp")
  #embedding layer
  embed = keras.layers.Embedding(embedding_in_dim, embedding_out_dim,name=prefix+"embed")(model_inputs)
  
  last_layer_model = None
  if rnn_type == 'LSTM':
    #adding everything except the last LSTM layer, because in last layer return state=True
    for i in range(layers):
      layered_model = keras.layers.LSTM(model_out_dim, return_sequences=True,return_state=True,dropout=dropout,name=prefix+"cell_"+str(i))
      if i==0:
        inp_layer = embed
      else:
        inp_layer = last_layer_model
      
      model_layer_out,state_h, state_c = layered_model(inp_layer,initial_state)
      
      last_layer_model = model_layer_out
    
    model_states = [state_h, state_c]
    
  elif rnn_type=='GRU':
    #adding everything except the last GRU layer, because in last layer return state=True    
    for i in range(layers):
      layered_model = keras.layers.GRU(model_out_dim, return_sequences=True,return_state=True,dropout=dropout,name=prefix+"cell_"+str(i))
      if i==0:
        inp_layer = embed
      else:
        inp_layer = last_layer_model
      
      model_layer_out,state = layered_model(inp_layer,initial_state)
      
      last_layer_model = model_layer_out

    model_states = [state]
  elif rnn_type=='RNN':
    #adding everything except the last RNN layer, because in last layer return state=True
    for i in range(layers):      
      layered_model = keras.layers.SimpleRNN(model_out_dim, return_sequences=True,return_state=True,dropout=dropout,name=prefix+"cell_"+str(i))
      if i==0:
        inp_layer = embed
      else:
        inp_layer = last_layer_model
        
      model_layer_out,state = layered_model(inp_layer,initial_state)
      
      last_layer_model = model_layer_out

    model_states = [state]
    
  return model_states,last_layer_model,model_inputs


In [17]:

#Build the model
def build_model(rnn_type,embedding_dim,encoder_layers,decoder_layers,dropout,latent_dim):
  
  encoder_states,encoder_outputs,encoder_inputs = build_layered_RNN_model(rnn_type=rnn_type,embedding_in_dim = num_encoder_tokens,embedding_out_dim = embedding_dim,layers = encoder_layers,dropout = dropout,inp_length = max_encoder_seq_length,model_out_dim = latent_dim,prefix=encoder_model_prefix)

  _,decoder_outputs,decoder_inputs = build_layered_RNN_model(rnn_type=rnn_type,embedding_in_dim = num_decoder_tokens,embedding_out_dim = embedding_dim,layers = decoder_layers,dropout = dropout,inp_length = max_decoder_seq_length,model_out_dim = latent_dim,prefix=decoder_model_prefix,initial_state = encoder_states)
  
  decoder_dense = keras.layers.Dense(num_decoder_tokens, activation="softmax",name='final')
  decoder_outputs = decoder_dense(decoder_outputs)

  model = keras.Model([encoder_inputs, decoder_inputs], decoder_outputs)
  
  return model

## Code for constructing inference model

In [21]:
def get_inference_encoder_model(model):
  encoder_layers,_ = get_encoder_decoder_layers_from_model(model)
  encoder_inputs = model.input[0]  
  if isinstance(model.get_layer(encoder_model_prefix+"cell_0"), keras.layers.LSTM):
    encoder_outputs, state_h_enc, state_c_enc = model.get_layer(encoder_model_prefix+"cell_"+str(encoder_layers - 1)).output  
    encoder_states = [state_h_enc, state_c_enc]
  elif (isinstance(model.get_layer(encoder_model_prefix+"cell_0"), keras.layers.GRU) or isinstance(model.get_layer(encoder_model_prefix+"cell_0"), keras.layers.RNN)):
    encoder_outputs, state = model.get_layer(encoder_model_prefix+"cell_"+str(encoder_layers - 1)).output  
    encoder_states = [state]

  encoder_model = keras.Model(encoder_inputs, encoder_states)
  return encoder_model

In [22]:
def get_inference_decoder_model(model):
  latent_dim = get_latent_dim_from_model(model)
  _,decoder_layers = get_encoder_decoder_layers_from_model(model)

  # Decoder during inference takes just one character(i.e vector rep of a character). This is either from previous timestep or start of sequence("\t")
  decoder_inputs =  keras.Input(shape=( 1))
  # Contains input to each decoder layer
  decoder_states_inputs=[]
  # Contains state output from each decoder layer
  decoder_states=[]
  previous_decoder_output = None

  emdedded_rep_of_decoder_input = model.get_layer(decoder_model_prefix+"embed")(decoder_inputs)
  
  if isinstance(model.get_layer(decoder_model_prefix+"cell_0"), keras.layers.LSTM):
    for i in range(decoder_layers):
      #every layer must have an input through which we can supply it's hidden state
      decoder_state_input_h = keras.Input(shape=(latent_dim,),name='inp3_'+str(i))
      decoder_state_input_c = keras.Input(shape=(latent_dim,),name='inp4_'+str(i))
      init_state = [decoder_state_input_h, decoder_state_input_c]
      decoder_lstm = model.get_layer(decoder_model_prefix+"cell_"+str(i))
      if i==0:
        decoder_outputs, state_h_dec, state_c_dec = decoder_lstm(emdedded_rep_of_decoder_input, initial_state=init_state)
      else:
        decoder_outputs, state_h_dec, state_c_dec = decoder_lstm(previous_decoder_output, initial_state=init_state )
      
      previous_decoder_output = decoder_outputs
      decoder_states_inputs.append (decoder_state_input_h)
      decoder_states_inputs.append (decoder_state_input_c)
      decoder_states.append (state_h_dec)
      decoder_states.append (state_c_dec)
  elif isinstance(model.get_layer(decoder_model_prefix+"cell_0"), keras.layers.GRU):
    for i in range(decoder_layers):
      decoder_state_input = keras.Input(shape=(latent_dim,),name='inp3_'+str(i))
      init_state = [decoder_state_input]
      decoder_gru = model.get_layer(decoder_model_prefix+"cell_"+str(i))
      if i==0:
        decoder_outputs, state = decoder_gru(emdedded_rep_of_decoder_input, initial_state=init_state)
      else:
        decoder_outputs, state = decoder_gru(previous_decoder_output, initial_state=init_state )
      
      previous_decoder_output = decoder_outputs
      decoder_states_inputs.append (decoder_state_input)
      decoder_states.append (state)
  elif isinstance(model.get_layer(decoder_model_prefix+"cell_0"), keras.layers.RNN):
    for i in range(decoder_layers):
      decoder_state_input = keras.Input(shape=(latent_dim,),name='inp3_'+str(i))
      init_state = [decoder_state_input]
      decoder_rnn = model.get_layer(decoder_model_prefix+"cell_"+str(i))
      if i==0:
        decoder_outputs, state = decoder_rnn(emdedded_rep_of_decoder_input, initial_state=init_state)
      else:
        decoder_outputs, state = decoder_rnn(previous_decoder_output, initial_state=init_state )
      
      previous_decoder_output = decoder_outputs
      decoder_states_inputs.append (decoder_state_input)
      decoder_states.append (state)      
  decoder_dense = model.get_layer('final')
  decoder_outputs = decoder_dense(previous_decoder_output)
  decoder_model = keras.Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states)

  return decoder_model

In [23]:
def build_inference_model(model):
    encoder_model = get_inference_encoder_model(model)
    
    decoder_model = get_inference_decoder_model(model)

    return encoder_model,decoder_model

## Code for running samples on inference model(with and without beam search)

In [25]:
class BeamRecordKeeping:
  def __init__(self,decoder_input_state_values,prev_char_index,joint_probability,accumulated_previous_chars):
    self.decoder_input_state_values = decoder_input_state_values.copy()
    self.prev_char_index = np.copy(prev_char_index)
    self.joint_probability = joint_probability
    self.accumulated_previous_chars = accumulated_previous_chars

  def __str__(self):
    return "decoder_input_state_values: "+str(self.decoder_input_state_values) + "\n prev_char_index: "+str(self.prev_char_index)+"\n joint_probability: "+str(self.joint_probability)+" accumulated_previous_chars: "+str(self.accumulated_previous_chars)


In [26]:
def get_sampled_char(sampled_token_index):
  if sampled_token_index == 0:
    sampled_char='\n'
  else:
    sampled_char = index_to_char_target[sampled_token_index]
  return sampled_char

def get_predicted_word_from_beam(list_of_beam_objs):
  current_highest_score = list_of_beam_objs[0].joint_probability
  predicted_word = list_of_beam_objs[0].accumulated_previous_chars
  for each_obj in list_of_beam_objs:
    if(each_obj.joint_probability > current_highest_score):
      predicted_word = each_obj.accumulated_previous_chars
  return predicted_word

### This function is for cases where beam_width=1

In [27]:
def decode_batch_of_sequences(rnn_type,input_seq,encoder_model,decoder_model,batch_size,encoder_layers,decoder_layers):
    # Get encoder output
    encoder_output_state_values = encoder_model.predict(input_seq)
    if rnn_type=='GRU' or 'RNN':
      decoder_input_state_values=[encoder_output_state_values]
    
    # This is needed because encoder state is fed to all decoder layers
    decoder_input_state_values = decoder_input_state_values * decoder_layers
    
    # This is contain previously predicted character's index for every words in batch.
    prev_char_index = np.zeros((batch_size, 1))
    # We start with \t for every word in batch
    prev_char_index[:, 0] = target_tokenizer.word_index['\t']
    
    predicted_words = [ "" for i in range(batch_size)]
    done=[False for i in range(batch_size)]
    for i in range(max_decoder_seq_length):
        decoder_out = decoder_model.predict(tuple([prev_char_index] + decoder_input_state_values))
        # Decoder output has both output of all timesteps followed by hidden states
        output_probability = decoder_out[0]
        # Decoder state input is previous layer state output
        decoder_input_state_values = decoder_out[1:]
        for j in range(batch_size):
          if done[j]:
            continue          
          sampled_token_index = np.argmax(output_probability[j, -1, :])
          if sampled_token_index == 0:
            sampled_char='\n'
          else:
            sampled_char = index_to_char_target[sampled_token_index]
          if sampled_char == '\n':
            done[j]=True
            continue            
          predicted_words[j] += sampled_char
          #update the previously predicted characters        
          prev_char_index[j,0]=target_tokenizer.word_index[sampled_char]
    return predicted_words

### This function is called when beam_width>1 during running inference model

In [28]:
def decode_batch_of_sequences_for_bigger_beam_width(rnn_type,input_seq,encoder_model,decoder_model,batch_size,encoder_layers,decoder_layers,beam_search_width):
    print("batch_size"+str(batch_size))
    print("input_seq:"+str(input_seq.shape))
    next_list_of_beam_record_objects = []
    predicted_words = [ "" for i in range(batch_size)]
    list_of_beam_record_objects = []
    for j in range(batch_size):
      next_list_of_beam_record_objects = []
      list_of_beam_record_objects = []
      if(j % 100 == 0):
        print("**********Batch number*************:"+str(j))
      current_seq = input_seq[j]
      current_seq = tf.expand_dims(current_seq, 0)
      # Get encoder output
      decoder_input_state_values = encoder_model.predict(current_seq)
      if rnn_type=='GRU' or 'RNN':
        decoder_input_state_values=[decoder_input_state_values]
        decoder_input_state_values = decoder_input_state_values * decoder_layers
      else:
        decoder_input_state_values = decoder_input_state_values[0] * decoder_layers
      
      prev_char_index = np.zeros((1, 1))
      # We start with \t for every word in batch
      prev_char_index[:, 0] = target_tokenizer.word_index['\t']
      done  = False
      for _ in range(beam_search_width):
        current_beam_search_obj = BeamRecordKeeping(decoder_input_state_values,prev_char_index,0,"")
        list_of_beam_record_objects.append(current_beam_search_obj)

      for i in range(max_decoder_seq_length):
        if(done):
          break

        if(i != 0):
          list_of_beam_record_objects = next_list_of_beam_record_objects
        next_list_of_beam_record_objects = []
        for beam_index in range(beam_search_width):
          # print("prev_char_index"+str(list_of_beam_record_objects[beam_index].prev_char_index.shape))
          # print("decoder_input_state_values"+str(len(list_of_beam_record_objects[beam_index].decoder_input_state_values)))
          # print("decoder_input_state_values"+str(list_of_beam_record_objects[beam_index].decoder_input_state_values[0].shape))

          decoder_out = decoder_model.predict(tuple([list_of_beam_record_objects[beam_index].prev_char_index] + list_of_beam_record_objects[beam_index].decoder_input_state_values))
          # Decoder output has both output of all timesteps followed by hidden states
          output_probability = decoder_out[0]
          # Decoder state input is previous layer state output
          decoder_input_state_values = decoder_out[1:]
          sampled_token_index = np.argsort(output_probability[0][-1, :])[-beam_search_width:]
          sampled_probability_values = output_probability[0][-1, :][sampled_token_index]

          for each_candidate in range(1,len(sampled_probability_values)+1):
            new_joint_probability = list_of_beam_record_objects[beam_index].joint_probability + math.log(sampled_probability_values[-each_candidate])
            if(len(next_list_of_beam_record_objects) < beam_search_width):
              sampled_char = get_sampled_char(sampled_token_index[-each_candidate])
              if sampled_char == '\n':
                done = True
                break
              accumulated_previous_chars = list_of_beam_record_objects[beam_index].accumulated_previous_chars + sampled_char
              prev_char_index[:, 0]=target_tokenizer.word_index[sampled_char]
              next_beam_record_keeping_obj = BeamRecordKeeping(decoder_input_state_values,prev_char_index,new_joint_probability,accumulated_previous_chars)
              next_list_of_beam_record_objects.append(next_beam_record_keeping_obj)
            else:
              replace_indx = -1
              for (current_indx,each_obj) in enumerate(next_list_of_beam_record_objects):
                if(each_obj.joint_probability < new_joint_probability):
                  replace_indx = current_indx
                  break
              if(replace_indx != -1):
                sampled_char = get_sampled_char(sampled_token_index[-each_candidate])
                if sampled_char == '\n':
                  done = True
                  break
                accumulated_previous_chars = list_of_beam_record_objects[beam_index].accumulated_previous_chars + sampled_char
                prev_char_index[:, 0]=target_tokenizer.word_index[sampled_char]
                next_beam_record_keeping_obj = BeamRecordKeeping(decoder_input_state_values,prev_char_index,new_joint_probability,accumulated_previous_chars)
                next_list_of_beam_record_objects[replace_indx] = next_beam_record_keeping_obj
          
          if(done or i == max_decoder_seq_length-1):
            if( len(next_list_of_beam_record_objects) == 0):
              predicted_words[j] = get_predicted_word_from_beam(list_of_beam_record_objects)
            else:
              predicted_words[j] = get_predicted_word_from_beam(next_list_of_beam_record_objects)
            break
        
    return predicted_words

## Code for obtaining accuracies on inference model

In [29]:
def test_accuracy(model,encoder_model,decoder_model,beam_search_width=1):
  rnn_type = get_rnntype_from_model(model)
  encoder_layers,decoder_layers = get_encoder_decoder_layers_from_model(model)
  
  success=0
  success_char = 0
  total_chars = 0
  #Get all the predicted words
  if(beam_search_width == 0 or beam_search_width == 1):
    pred=decode_batch_of_sequences(rnn_type,test_input_tensor,encoder_model,decoder_model,test_input_tensor.shape[0],encoder_layers,decoder_layers)
  else:
    pred=decode_batch_of_sequences_for_bigger_beam_width(rnn_type,test_input_tensor,encoder_model,decoder_model,test_input_tensor.shape[0],encoder_layers,decoder_layers,beam_search_width)
  for seq_index in range(test_input_tensor.shape[0]):
      predicted_word = pred[seq_index]
      target_word=test_target_texts[seq_index][1:-1]
      for (indx,each_ele) in enumerate(target_word):
        total_chars += 1
        if(indx < len(predicted_word)):
          if(target_word[indx] == predicted_word[indx]):
            success_char += 1

      #test the word one by one and write to files
      if target_word == predicted_word:
        success+=1
        f = open("success.txt", "a")
        f.write(test_input_texts[seq_index]+' '+target_word+' '+predicted_word+'\n')
        f.close()
      else:
        f = open("failure.txt", "a")
        f.write(test_input_texts[seq_index]+' '+target_word+' '+predicted_word+'\n')
        f.close()
  
  print("success:"+str(success))
  print("success_char:"+str(success_char))
  return float(success)/float(test_input_tensor.shape[0]),float(success_char)/float(total_chars)

In [30]:
def batch_validate(model,encoder_model,decoder_model,beam_search_width=1):
  rnn_type = get_rnntype_from_model(model)
  encoder_layers,decoder_layers = get_encoder_decoder_layers_from_model(model)

  success = 0
  success_char = 0
  total_chars = 0
  #get all the predicted words
  if(beam_search_width == 0 or beam_search_width == 1):
    pred=decode_batch_of_sequences(rnn_type,val_input_tensor,encoder_model,decoder_model,val_input_tensor.shape[0],encoder_layers,decoder_layers)
  else:
    pred=decode_batch_of_sequences_for_bigger_beam_width(rnn_type,val_input_tensor,encoder_model,decoder_model,val_input_tensor.shape[0],encoder_layers,decoder_layers,beam_search_width)

  for seq_index in range(val_input_tensor.shape[0]):
    predicted_word = pred[seq_index]
    target_word = val_target_texts[seq_index][1:-1]
    #test the words one by one
    if predicted_word == target_word:
      # print("pred:"+str(pred[seq_index]))
      # print("Target: "+str(val_target_texts[seq_index][1:-1]))
      success+=1
      
    for (indx,each_ele) in enumerate(target_word):
      total_chars += 1
      if(indx < len(predicted_word)):
        if(target_word[indx] == predicted_word[indx]):
          # print("pred:"+str(pred[seq_index]))
          # print("Target: "+str(target_word))
          success_char += 1
  
  print("success:"+str(success))
  print("success_char:"+str(success_char))
  # print("val_input_tensor.shape[0]:"+str(val_input_tensor.shape[0]))
  return float(success)/float(val_input_tensor.shape[0]),float(success_char)/float(total_chars)

## Code to train model and return inference and root model 

In [32]:
def run_custom_model(encoder_layers,decoder_layers,epochs,lr,latent_dim,rnn_type,embedding_dim,dropout,bs,optimizer,model_save_path,encoder_save_path,decoder_save_path,save=False,use_wandb=False):

      # Create a MirroredStrategy.
    if tf.config.list_physical_devices('GPU'):
        strategy = tf.distribute.MirroredStrategy()
    else:  # use default strategy
        strategy = tf.distribute.get_strategy()
    print('Number of devices: {}'.format(strategy.num_replicas_in_sync))
        # Open a strategy scope and create the model
    with strategy.scope():
      model = build_model(rnn_type,embedding_dim,encoder_layers,decoder_layers,dropout,latent_dim)

    plot_model(model, to_file=str(model_save_path)+'.png', show_shapes=True, show_dtype=True,show_layer_names=True)

    model.summary()
    
    optimizerObj = get_optimizer(optimizer,lr)
    
    model.compile(optimizer=optimizerObj, loss=keras.losses.SparseCategoricalCrossentropy(reduction='none'), metrics=["accuracy"])
    if(use_wandb == False):
      hist=model.fit([train_input_tensor, train_target_tensor],tf.concat([train_target_tensor[:,1:],tf.zeros((train_target_tensor[:,:].shape[0],1))], axis=1),batch_size=bs,epochs=epochs,shuffle=True)
    else:
      hist=model.fit([train_input_tensor, train_target_tensor],tf.concat([train_target_tensor[:,1:],tf.zeros((train_target_tensor[:,:].shape[0],1))], axis=1),batch_size=bs,epochs=epochs,shuffle=True,callbacks=[WandbCallback(), history])

    encoder_inference_model,decoder_inference_model=build_inference_model(model)
    plot_model(encoder_inference_model, to_file=str(encoder_save_path)+'.png', show_shapes=True)
    plot_model(decoder_inference_model, to_file=str(decoder_save_path)+'.png', show_shapes=True)

    if(save == True):
      from google.colab import drive
      drive.mount('/content/drive')
      model.save('drive/MyDrive/Colab Notebooks/'+str(model_save_path)+'.h5')
      encoder_inference_model.save('drive/MyDrive/Colab Notebooks/'+str(encoder_save_path)+".h5")
      decoder_inference_model.save('drive/MyDrive/Colab Notebooks/'+str(decoder_save_path)+".h5")
      plot_model(encoder_inference_model, to_file='drive/MyDrive/Colab Notebooks/'+str(encoder_save_path)+".png", show_shapes=True)
      plot_model(decoder_inference_model, to_file='drive/MyDrive/Colab Notebooks/'+str(decoder_save_path)+'.png', show_shapes=True)
      plot_model(model, to_file='drive/MyDrive/Colab Notebooks/'+str(model_save_path)+'.png', show_shapes=True, show_dtype=True,show_layer_names=True)


    model.save(str(model_save_path)+'.h5')
    encoder_inference_model.save(str(encoder_save_path)+'.h5')
    decoder_inference_model.save(str(decoder_save_path)+'.h5')

    return model,encoder_inference_model,decoder_inference_model

## Code used to obtain accuracy either by loading models from path(in GDrive or local) or by passing model objects

In [33]:
def load_model_run_accuracy(is_test=False,model_path=None,encoder_inf_path=None,decoder_inf_path=None,from_gdrive=False,beam_width = 1,model=None,encoder_inference_model=None,decoder_inference_model=None):
  if(from_gdrive == True):
    from google.colab import drive
    drive.mount('/content/drive')
  
  if(model is None):
    print("Load model from path")
    model = keras.models.load_model(model_path)
  
  if(encoder_inf_path is None or decoder_inf_path is None):
    if(decoder_inference_model is None or encoder_inference_model is None):
      encoder_inference_model,decoder_inference_model=build_inference_model(model)
  else:
    encoder_inference_model= keras.models.load_model(encoder_inf_path)
    decoder_inference_model= keras.models.load_model(decoder_inf_path)
  
  if(is_test == True):
    word_val_acc,char_val_acc=test_accuracy(model,encoder_inference_model,decoder_inference_model,beam_width)
  else:
    word_val_acc,char_val_acc=batch_validate(model,encoder_inference_model,decoder_inference_model,beam_width)

  return word_val_acc,char_val_acc

## Code to run the best model observed and return inference and root models
Later, load_model_run_accuracy needs to be called to obtain accuracies

In [37]:
def run_best_model(save=False):
  encoder_layers = 3
  decoder_layers = 3
  epochs = 20
  lr = 0.0001
  latent_dim = 1024
  rnn_type = 'GRU'
  embedding_dim = 512
  dropout = 0.4
  bs = 64
  optimizer = "Adam"
  model,encoder_inference_model,decoder_inference_model = run_custom_model(encoder_layers,decoder_layers,epochs,lr,latent_dim,rnn_type,embedding_dim,dropout,bs,optimizer,"best_model_assignment_3","best_encoder_inference_model_assignment_3","best_decoder_inference_model_assignment_3",save)
  return model,encoder_inference_model,decoder_inference_model

## Code related to hyper-parameter tuning

In [34]:
default_config = {
        "rnn_type": "LSTM",
        "dropout": 0.5,
        "encoder_layers":3,
        "decoder_layers":3,
        "latent_dim": 64,
        "epochs": 1,
        "lr": 0.0001,
        "embedding_out_dim": 64,
        "beam_search":1,
        "batch_size":64,
        "optimizer": "Adam"
    }

#Keras callback    
history = History()

In [35]:
def HP_tuning_run():
    # Create a MirroredStrategy.
    if tf.config.list_physical_devices('GPU'):
        strategy = tf.distribute.MirroredStrategy()
    else:  # use default strategy
        strategy = tf.distribute.get_strategy()
    print('Number of devices: {}'.format(strategy.num_replicas_in_sync))

    # wandb.init(config=default_config, magic=True,project="DeepLearningAssignment-3", entity='cs21s002-ee21s113-dlassignment-1')
    wandb.init(config=default_config, magic=True,project="DL-Assignment3", entity='cs21s002-ee21s113-dlassignment-1')
    # wandb.init(config=default_config, magic=True,project="DeepLearningAssignment-3", entity='akshaygrao')
    config = wandb.config
    print("Config: "+str(config))
    run_name = str(config).replace("{", "").replace("}","").replace(":","-")
    wandb.run.name = run_name
    
    model,encoder_inference_model,decoder_inference_model = run_custom_model(encoder_layers=config.encoder_layers,decoder_layers=config.decoder_layers,epochs=config.epochs,lr=config.lr,latent_dim=config.latent_dim,rnn_type=config.rnn_type,embedding_dim=config.embedding_out_dim,dropout=config.dropout,bs=config.batch_size,optimizer=config.optimizer,model_save_path=f'{run_name.replace(",","-")}_model',encoder_save_path=f'{run_name.replace(",","-")}_encoder',decoder_save_path=f'{run_name.replace(",","-")}_decoder',save=False,use_wandb=True)

    word_val_acc,char_val_acc = load_model_run_accuracy(model=model,encoder_inference_model=encoder_inference_model,decoder_inference_model=decoder_inference_model,beam_width=config.beam_search)
    print("word_val_acc"+str(word_val_acc))
    print("char_val_acc"+str(char_val_acc))
    wandb.log({"word_val_acc":round(word_val_acc,5)})
    wandb.log({"char_val_acc":round(char_val_acc,5)})
    wandb.log({"language":transliteration_target_language})
    


In [36]:
sweep_config = {
    "name": "Assignment-3-final-batch-optimizer",
    "method": "bayes",
    "metric":{
      "goal": "maximize",
      "name": "word_val_acc"
    },
    "project": 'DL-Assignment3',
    "parameters": {
        "rnn_type": {
            "values": ["LSTM","GRU","RNN"]
        },
        "dropout": {
            "values": [0.2,0.4]
        },
        "encoder_layers": {
            "values": [3]
        },
        "decoder_layers": {
            "values": [3]
        },
        "latent_dim": {
            "values": [512,1024,2048]
        },
        "epochs": {
            "values": [20]
        },
        "lr": {
            "values": [0.0001]
        },
        "embedding_out_dim": {
            "values":[128,256]
        },
        "beam_search":{
            "values":[1]
        },
        "batch_size":{
            "values":[64,128,256]
        },
        "optimizer":{
            "values":["Nadam","SGD","RMSprop"]
        }
        
    }
}

In [None]:
# sweep_id = wandb.sweep(sweep_config,  project='DeepLearningAssignment-3', entity='akshaygrao')
# sweep_id = wandb.sweep(sweep_config,  project='DeepLearningAssignment-3', entity='cs21s002-ee21s113-dlassignment-1')
# sweep_id = wandb.sweep(sweep_config,  project='DL-Assignment3', entity='cs21s002-ee21s113-dlassignment-1')
sweep_id="1fwq5qge"

VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

In [None]:
# wandb.agent(sweep_id, function=HP_tuning_run, project='DeepLearningAssignment-3', entity='akshaygrao')
# wandb.agent(sweep_id, function=HP_tuning_run, project='DeepLearningAssignment-3', entity='cs21s002-ee21s113-dlassignment-1')
wandb.agent(sweep_id, function=HP_tuning_run, project='DL-Assignment3', entity='cs21s002-ee21s113-dlassignment-1')

# Run this code in colab to get test accuracy for best model

In [None]:
model,encoder_inference_model,decoder_inference_model = run_best_model()
word_val_acc,char_val_acc = load_model_run_accuracy(model=model,encoder_inference_model=encoder_inference_model,decoder_inference_model=decoder_inference_model,is_test=True)
print("Test word_val_acc"+str(word_val_acc))
print("Test char_val_acc"+str(char_val_acc))

# Command line argument handlers

In [38]:
def init_argparse() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser()
    
    subparsers = parser.add_subparsers(dest='command')
    run_model_parser = subparsers.add_parser('run_model')
    run_model_parser.add_argument("--encoder_layers", action="store",dest='encoder_layers', help="Specify number of layers in encoder(default:3)",default=3,required=False)
    run_model_parser.add_argument("--decoder_layers", action="store",dest='decoder_layers', help="Specify number of layers in decoder(default:3)",default=3,required=False)
    run_model_parser.add_argument("--epochs", action="store",dest='epochs', help="Specify number of epochs(Default:15)",default=15,required=False)
    run_model_parser.add_argument("--lr", action="store",dest='lr', help="Specify learning rate(Default:0.0001)",default=0.0001,required=False)
    run_model_parser.add_argument("--latent_dim", action="store",dest='latent_dim', help="Specify latent dimensions(Default:1024)",default=1024,required=False)
    run_model_parser.add_argument("--rnn_type", action="store",dest='rnn_type', help="Specify cell type of RNN('RNN','GRU','LSTM'))(Default:GRU)",default='GRU',required=False)
    run_model_parser.add_argument("--embedding_dim", action="store",dest='embedding_dim', help="Specify dimension of embedding layer output(Default:256)",default=256,required=False)
    run_model_parser.add_argument("--dropout", action="store",dest='dropout', help="Specify dropout in input (applies at all layers of encoder and decoder)(Default:0.4)",default=0.4,required=False)
    run_model_parser.add_argument("--bs", action="store",dest='bs', help="Specify batch size(Default:64)",default=64,required=False)
    run_model_parser.add_argument("--optimizer", action="store",dest='optimizer', help="Specify optimizer algorithm('Adam','Nadam','SGD','RMSprop')(Default:'Adam')",default='Adam',required=False)
    run_model_parser.add_argument("--model_save_path", action="store",dest='model_save_path', help="Specify path to save model into(Default:'model')",default='model',required=False)
    run_model_parser.add_argument("--encoder_save_path", action="store",dest='encoder_save_path', help="Specify path to save encoder model into(Default:'encoder_inference_model')",default='encoder_inference_model',required=False)
    run_model_parser.add_argument("--decoder_save_path", action="store",dest='decoder_save_path', help="Specify path to save decoder model into(Default:'decoder_inference_model')",default='decoder_inference_model',required=False)
    run_model_parser.add_argument("--save", action="store_true",dest='save', help="Save model to google drive(default false)",required=False)

    run_best_model_parser = subparsers.add_parser('run_best_model')
    run_best_model_parser.add_argument("--save", action="store_true",dest='save', help="Save model to google drive(default false)",required=False)

    last_accuracy_parser = subparsers.add_parser('last_model_run_accuracy')
    last_accuracy_parser.add_argument("--is_test", action="store_true",dest='is_test', help="Specify if you need test accuracy(or validation accuracy). If passed assumes True otherwise returns validation accuracy",required=False)
    last_accuracy_parser.add_argument("--beam_width", action="store",dest='beam_width', help="Specify beam width(Default:1)",default=1,required=False)

    accuracy_parser = subparsers.add_parser('load_model_run_accuracy')
    accuracy_parser.add_argument("--model_path", action="store",dest='model_path', help="Specify model path",required=True)
    accuracy_parser.add_argument("--encoder_inf_path", action="store",dest='encoder_inf_path', help="Specify encoder model path",required=True)
    accuracy_parser.add_argument("--decoder_inf_path", action="store",dest='decoder_inf_path', help="Specify decoder model path",required=True)
    accuracy_parser.add_argument("--from_gdrive", action="store_true",dest='from_gdrive', help="Retrieve path from google drive(If passed assumes True)",required=False)
    accuracy_parser.add_argument("--beam_width", action="store",dest='beam_width', help="Specify beam width(Default:1)",default=1,required=False)
    accuracy_parser.add_argument("--is_test", action="store_true",dest='is_test', help="Specify if you need test accuracy(or validation accuracy). If passed assumes True otherwise returns validation accuracy",required=False)

    return parser

In [156]:
if __name__ == "__main__":
  parser = init_argparse()
  args = parser.parse_args(['--help'])

  model = None
  encoder_inference_model = None
  decoder_inference_model = None
  
  if(args.command == 'run_model'):
    encoder_layers = args.encoder_layers 
    decoder_layers = args.decoder_layers 
    epochs = args.epochs 
    lr = args.lr 
    latent_dim = args.latent_dim 
    rnn_type = args.rnn_type 
    embedding_dim = args.embedding_dim 
    dropout = args.dropout 
    bs = args.bs 
    optimizer = args.optimizer 
    model_save_path = args.model_save_path 
    encoder_save_path = args.encoder_save_path 
    decoder_save_path = args.decoder_save_path 
    save = args.save

    model,encoder_inference_model,decoder_inference_model = run_custom_model(encoder_layers,decoder_layers,epochs,lr,latent_dim,rnn_type,embedding_dim,dropout,bs,beam_width,optimizer,model_save_path,encoder_save_path,decoder_save_path,save=save)
  
  elif(args.command == 'run_best_model'):
    print("Running best model----")
    model,encoder_inference_model,decoder_inference_model = run_best_model(save=args.save)
  elif(args.command == 'last_model_run_accuracy'):
    if(model is None or encoder_inference_model is None or decoder_inference_model is None):
      print("Producing accuracy of last run model")
      is_test = args.is_test
      beam_width = args.beam_width
      if(is_test):
        word_val_acc,char_val_acc = load_model_run_accuracy(model=model,encoder_inference_model=encoder_inference_model,decoder_inference_model=decoder_inference_model,beam_width=beam_width,is_test=is_test)
        print("Test word_val_acc"+str(word_val_acc))
        print("Test char_val_acc"+str(char_val_acc))
      else:
        word_val_acc,char_val_acc = load_model_run_accuracy(model=model,encoder_inference_model=encoder_inference_model,decoder_inference_model=decoder_inference_model,beam_width=beam_width,is_test=is_test)
        print("Validation word_val_acc"+str(word_val_acc))
        print("Validation char_val_acc"+str(char_val_acc))
    else:
      print("Invalid command!!! Run a model before running accuracy")
  elif(args.command == 'load_model_run_accuracy'):
    print("Producing accuracy by running model loaded from path")
    is_test = args.is_test
    beam_width = args.beam_width
    model_path = args.model_path
    encoder_inf_path = args.encoder_inf_path
    decoder_inf_path = args.decoder_inf_path
    from_gdrive = args.from_gdrive
    
    if(is_test):
      word_val_acc,char_val_acc = load_model_run_accuracy(model_path=model_path,encoder_inf_path=encoder_inf_path,decoder_inf_path=decoder_inf_path,beam_width=beam_width,is_test=is_test,from_gdrive=from_gdrive)
      print("Test word_val_acc"+str(word_val_acc))
      print("Test char_val_acc"+str(char_val_acc))
    else:
      word_val_acc,char_val_acc = load_model_run_accuracy(model_path=model_path,encoder_inf_path=encoder_inf_path,decoder_inf_path=decoder_inf_path,beam_width=beam_width,is_test=is_test,from_gdrive=from_gdrive)
      print("Validation word_val_acc"+str(word_val_acc))
      print("Validation char_val_acc"+str(char_val_acc))

usage: ipykernel_launcher.py [-h]
                             {run_model,run_best_model,last_model_run_accuracy,load_model_run_accuracy}
                             ...

positional arguments:
  {run_model,run_best_model,last_model_run_accuracy,load_model_run_accuracy}

optional arguments:
  -h, --help            show this help message and exit


SystemExit: ignored

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
