<a href="https://colab.research.google.com/github/girish445ai/Recurrent_Neural_networks/blob/main/TESTING_Transliteration_without_attention_FINAL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Importing Libraries 

In [41]:
import io
import csv
import numpy as np
import tensorflow 
from keras.models import Model
from keras.layers import Input, LSTM, Dense, Embedding, GRU, Dropout, SimpleRNN
from tensorflow.keras.optimizers import Adam, SGD, RMSprop, Nadam
from keras.preprocessing.sequence import pad_sequences
from keras.utils.vis_utils import plot_model
from math import log
from numpy import array
from numpy import argmax

import keras
from keras.models import load_model
from math import log1p 

In [42]:
#%pip install wandb -q
#import wandb
#from wandb.keras import WandbCallback

### Unzipping the dataset

Lexicons for Latin-Telugu are taken from Google's Dakshina dataset.

In [43]:
# Downloading dakshina dataset
!yes | wget "https://storage.googleapis.com/gresearch/dakshina/dakshina_dataset_v1.0.tar"

--2022-05-08 15:56:28--  https://storage.googleapis.com/gresearch/dakshina/dakshina_dataset_v1.0.tar
Resolving storage.googleapis.com (storage.googleapis.com)... 173.194.76.128, 142.251.5.128, 2a00:1450:400c:c00::80, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|173.194.76.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2008340480 (1.9G) [application/x-tar]
Saving to: ‘dakshina_dataset_v1.0.tar.2’


2022-05-08 15:56:38 (204 MB/s) - ‘dakshina_dataset_v1.0.tar.2’ saved [2008340480/2008340480]



In [44]:
# Unzipping dataset
!yes | tar xopf dakshina_dataset_v1.0.tar

In [45]:
# The dakshina dataset has the lexicons that are used in this program.
!ls dakshina_dataset_v1.0/te/lexicons

te.translit.sampled.dev.tsv   te.translit.sampled.train.tsv
te.translit.sampled.test.tsv


In [46]:
print_data = True

## Reading the dataset


In [47]:
train_path = "./dakshina_dataset_v1.0/te/lexicons/te.translit.sampled.train.tsv"
dev_path = "./dakshina_dataset_v1.0/te/lexicons/te.translit.sampled.dev.tsv"
test_path = "./dakshina_dataset_v1.0/te/lexicons/te.translit.sampled.dev.tsv"

def reading_data(corpus_file):
  # function reads the raw text of words and returns native versions of words
  telugu_words = []
  latin_words = []
  with io.open(corpus_file, encoding ='utf-8') as f:
    for line in f:
      if '\t' not in line:
        continue
      tokens = line.rstrip().split("\t")
      latin_words.append(tokens[1])
      telugu_words.append(tokens[0])
  return latin_words, telugu_words

train_source, train_target = reading_data(train_path)
val_source, val_target = reading_data(dev_path)
test_source, test_target = reading_data(test_path)

print("Number of training samples: ", len(train_source))
print("Number of validation samples: ", len(val_source))
print("Number of testing samples: ", len(test_source))

Number of training samples:  58550
Number of validation samples:  5683
Number of testing samples:  5683


In [48]:
arr = np.arange(len(train_source))
np.random.shuffle(arr)
arr1 = np.arange(len(val_source))
np.random.shuffle(arr1)

input_chars = set()
target_chars = set()
input_lexicons_nextstep = []
target_lexicons_nextstep = []
val_input_lexicons_nextstep = []
val_target_lexicons_nextstep = []

for (input_text, target_text) in zip(train_source, train_target):
    # "tab" is the "start sequence" characte ,"\n" is "end sequence" character.
    target_text = "B" + target_text + "E"
    input_lexicons_nextstep.append(input_text)
    target_lexicons_nextstep.append(target_text)
    for char in input_text:
        if char not in input_chars:
            input_chars.add(char)
    for char in target_text:
        if char not in target_chars:
            target_chars.add(char)

for (input_text, target_text) in zip(val_source, val_target):
    # "tab" is the "start sequence" character ,"\n" is "end sequence" character.
    target_text = "B" + target_text + "E"
    val_input_lexicons_nextstep.append(input_text)
    val_target_lexicons_nextstep.append(target_text)
    for char in input_text:
        if char not in input_chars:
            input_chars.add(char)
    for char in target_text:
        if char not in target_chars:
            target_chars.add(char)

input_lexicons = []
target_lexicons = []

for i in range(len(train_source)):
    input_lexicons.append(input_lexicons_nextstep[arr[i]])
    target_lexicons.append(target_lexicons_nextstep[arr[i]])

val_input_lexicons = []
val_target_lexicons = []

for i in range(len(val_source)):
    val_input_lexicons.append(val_input_lexicons_nextstep[arr1[i]])
    val_target_lexicons.append(val_target_lexicons_nextstep[arr1[i]])

input_chars.add(" ")
target_chars.add(" ")

input_chars = sorted(list(input_chars))
target_chars = sorted(list(target_chars))


no_enc_tokens = len(input_chars)
no_dec_tokens = len(target_chars)
enc_seq_length = max([len(txt) for txt in input_lexicons])
dec_seq_length = max([len(txt) for txt in target_lexicons])
val_max_encoder_seq_length = max([len(txt) for txt in val_input_lexicons])
val_max_decoder_seq_length = max([len(txt) for txt in val_target_lexicons])



print("No of samples:", len(input_lexicons))
print("No of unique input tokens:", no_enc_tokens)
print("No of unique output tokens:", no_dec_tokens)
print("Maximum sequence length for inputs:", enc_seq_length)
print("Maximum sequence length for outputs:", dec_seq_length)
print("Maximum sequence length for val inputs:", val_max_encoder_seq_length)
print("Maximum sequence length for val outputs:", val_max_decoder_seq_length)

print(input_chars)
print(target_chars)

No of samples: 58550
No of unique input tokens: 27
No of unique output tokens: 66
Maximum sequence length for inputs: 25
Maximum sequence length for outputs: 22
Maximum sequence length for val inputs: 21
Maximum sequence length for val outputs: 21
[' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
[' ', 'B', 'E', 'ం', 'ః', 'అ', 'ఆ', 'ఇ', 'ఈ', 'ఉ', 'ఊ', 'ఋ', 'ఎ', 'ఏ', 'ఐ', 'ఒ', 'ఓ', 'ఔ', 'క', 'ఖ', 'గ', 'ఘ', 'చ', 'ఛ', 'జ', 'ఝ', 'ఞ', 'ట', 'ఠ', 'డ', 'ఢ', 'ణ', 'త', 'థ', 'ద', 'ధ', 'న', 'ప', 'ఫ', 'బ', 'భ', 'మ', 'య', 'ర', 'ఱ', 'ల', 'ళ', 'వ', 'శ', 'ష', 'స', 'హ', 'ా', 'ి', 'ీ', 'ు', 'ూ', 'ృ', 'ె', 'ే', 'ై', 'ొ', 'ో', 'ౌ', '్', '\u200c']


In [49]:
print(input_lexicons[123:130])
print(target_lexicons[123:130])

['ghattamaneni', 'emuka', 'thare', 'angeekaristaaru', 'nirvahinchadaniki', 'uttheej', 'samsthalato']
['Bఘట్టమనేనిE', 'BఎముకE', 'Bదేర్E', 'Bఅంగీకరిస్తారుE', 'Bనిర్వహించడానికిE', 'Bఉత్తేజ్E', 'Bసంస్థలతోE']


**Training** :

In [50]:
# input_token_index is a dictionary containg the latin characters.
# target_token_index is a dictionary containing the characters of target language here telugu.
input_token_index = dict([(char, i) for i, char in enumerate(input_chars)])
target_token_index = dict([(char, i) for i, char in enumerate(target_chars)])
print(input_token_index)
print(target_token_index)

{' ': 0, 'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'f': 6, 'g': 7, 'h': 8, 'i': 9, 'j': 10, 'k': 11, 'l': 12, 'm': 13, 'n': 14, 'o': 15, 'p': 16, 'q': 17, 'r': 18, 's': 19, 't': 20, 'u': 21, 'v': 22, 'w': 23, 'x': 24, 'y': 25, 'z': 26}
{' ': 0, 'B': 1, 'E': 2, 'ం': 3, 'ః': 4, 'అ': 5, 'ఆ': 6, 'ఇ': 7, 'ఈ': 8, 'ఉ': 9, 'ఊ': 10, 'ఋ': 11, 'ఎ': 12, 'ఏ': 13, 'ఐ': 14, 'ఒ': 15, 'ఓ': 16, 'ఔ': 17, 'క': 18, 'ఖ': 19, 'గ': 20, 'ఘ': 21, 'చ': 22, 'ఛ': 23, 'జ': 24, 'ఝ': 25, 'ఞ': 26, 'ట': 27, 'ఠ': 28, 'డ': 29, 'ఢ': 30, 'ణ': 31, 'త': 32, 'థ': 33, 'ద': 34, 'ధ': 35, 'న': 36, 'ప': 37, 'ఫ': 38, 'బ': 39, 'భ': 40, 'మ': 41, 'య': 42, 'ర': 43, 'ఱ': 44, 'ల': 45, 'ళ': 46, 'వ': 47, 'శ': 48, 'ష': 49, 'స': 50, 'హ': 51, 'ా': 52, 'ి': 53, 'ీ': 54, 'ు': 55, 'ూ': 56, 'ృ': 57, 'ె': 58, 'ే': 59, 'ై': 60, 'ొ': 61, 'ో': 62, 'ౌ': 63, '్': 64, '\u200c': 65}


In [51]:
# Encoder Input Sequences are padded to a maximum length of MAX encoder SeqLen characters. 
enc_input_data = np.zeros(
    (len(input_lexicons), enc_seq_length), dtype="float32"
)
dec_input_data = np.zeros(
    (len(input_lexicons), dec_seq_length), dtype="float32"
)
dec_target_data = np.zeros(
    (len(input_lexicons), dec_seq_length, no_dec_tokens), dtype="float32"
)
#Decoder Target Sequences are Padded to a maximum length of max_decoder SeqLen characters with a vocabulary of sizeofTeluguVocab different characters. 
for i, (input_text, target_text) in enumerate(zip(input_lexicons, target_lexicons)):
    for t, char in enumerate(input_text):
        enc_input_data[i, t] = input_token_index[char]
    enc_input_data[i, t + 1 :] = input_token_index[" "]

    for t, char in enumerate(target_text):
        # dec_target_data is ahead of dec_input_data by one timestep
        dec_input_data[i, t] = target_token_index[char]
        if t > 0:
            # dec_target_data will not include the start character.
            dec_target_data[i, t - 1, target_token_index[char]] = 1.0
    dec_input_data[i, t + 1: ] = target_token_index[" "]
    dec_target_data[i, t:, target_token_index[" "]] = 1.0


val_enc_input_data = np.zeros(
    (len(input_lexicons), val_max_encoder_seq_length), dtype="float32"
)
val_dec_input_data = np.zeros(
    (len(input_lexicons), val_max_decoder_seq_length), dtype="float32"
)
val_dec_target_data = np.zeros(
    (len(input_lexicons), val_max_decoder_seq_length, no_dec_tokens), dtype="float32"
)

for i, (input_text, target_text) in enumerate(zip(val_input_lexicons, val_target_lexicons)):
    for t, char in enumerate(input_text):
        # Enumerate() method adds a counter to an iterable and returns it in a form of enumerating object. 
        # This enumerated object can then be used directly for loops or converted into a list of tuples using the list() method.
        val_enc_input_data[i, t] = input_token_index[char]
    val_enc_input_data[i, t + 1 :] = input_token_index[" "]

    for t, char in enumerate(target_text):
        val_dec_input_data[i, t] = target_token_index[char]
        if t > 0:
            # dec_target_data will be ahead by one timestep
            # and will not include the start character.
            val_dec_target_data[i, t - 1, target_token_index[char]] = 1.0
    val_dec_input_data[i, t + 1: ] = target_token_index[" "]
    val_dec_target_data[i, t:, target_token_index[" "]] = 1.0




In [52]:
# Feeding the characters in reverse order (bidirectional) for better processing
reverse_input_char_index = dict((i, char) for char, i in input_token_index.items())
reverse_target_char_index = dict((i, char) for char, i in target_token_index.items())
print(reverse_target_char_index)

{0: ' ', 1: 'B', 2: 'E', 3: 'ం', 4: 'ః', 5: 'అ', 6: 'ఆ', 7: 'ఇ', 8: 'ఈ', 9: 'ఉ', 10: 'ఊ', 11: 'ఋ', 12: 'ఎ', 13: 'ఏ', 14: 'ఐ', 15: 'ఒ', 16: 'ఓ', 17: 'ఔ', 18: 'క', 19: 'ఖ', 20: 'గ', 21: 'ఘ', 22: 'చ', 23: 'ఛ', 24: 'జ', 25: 'ఝ', 26: 'ఞ', 27: 'ట', 28: 'ఠ', 29: 'డ', 30: 'ఢ', 31: 'ణ', 32: 'త', 33: 'థ', 34: 'ద', 35: 'ధ', 36: 'న', 37: 'ప', 38: 'ఫ', 39: 'బ', 40: 'భ', 41: 'మ', 42: 'య', 43: 'ర', 44: 'ఱ', 45: 'ల', 46: 'ళ', 47: 'వ', 48: 'శ', 49: 'ష', 50: 'స', 51: 'హ', 52: 'ా', 53: 'ి', 54: 'ీ', 55: 'ు', 56: 'ూ', 57: 'ృ', 58: 'ె', 59: 'ే', 60: 'ై', 61: 'ొ', 62: 'ో', 63: 'ౌ', 64: '్', 65: '\u200c'}


In [53]:
print(enc_input_data[1])
print(dec_input_data[1])
print(dec_target_data[1])

[ 4. 18. 21.  7. 22.  9. 19.  8.  1. 25.  1. 13.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.]
[ 1. 34. 57. 20. 64. 47. 53. 49. 42.  3.  2.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.]
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]]


For Validation and testing:

In [54]:
print(val_dec_input_data[26])
print(val_dec_target_data[26])

[ 1. 42. 55. 34. 64. 35. 41. 55. 36. 18. 55.  2.  0.  0.  0.  0.  0.  0.
  0.  0.  0.]
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]]


In [55]:
x_test = val_enc_input_data
y_test = val_target_lexicons

## MODEL TRAINING 

In [56]:
class GJRNN(object):
  def __init__(self,cell_type = 'RNN',in_emb = 32, hidden_size=32, learning_rate= 1e-3, 
               dropout=0.4,pred_type ='greedy',epochs = 10, batch_size = 32,beam_width = 5,
               num_enc = 1,num_dec = 1):
    # RNN class with initialization of different parameters.
    self.cell_type = cell_type
    self.in_emb = in_emb
    self.hidden_size = hidden_size
    self.learning_rate = learning_rate
    self.dropout = dropout
    self.pred_type = pred_type
    self.epochs = epochs
    self.batch_size = batch_size
    self.beam_width = beam_width
    self.num_enc = num_enc
    self.num_dec = num_dec

  def model_build(self,enc_input_data,dec_input_data,dec_target_data,x_test, y_test):
    enc_inputs = Input(shape=(None, ),name = 'Enc_inputs')

    # The Embedding layer takes the input vocab of size of number encoder tokens, and
    # returns the output embedding of size in_emb.(embedding dimension)
    enc_emb =  Embedding(no_enc_tokens, self.in_emb , mask_zero = True,name = 'Enc_emb')(enc_inputs)

    enc_outputs = enc_emb
    if self.cell_type == 'LSTM':
      enc_lstm = LSTM(self.hidden_size, return_state=True,dropout = self.dropout, return_sequences=True, name="Enc_hidden_1")
      enc_outputs, state_h, state_c = enc_lstm(enc_outputs)
      enc_states = [state_h, state_c]

      # Adding a LSTM layer with hidden_size internal units.
      for i in range( 2, self.num_enc +1):
        layer_name = ('Enc_hidden_%d') %i

        enc_lstm = LSTM(self.hidden_size, return_state=True,dropout = self.dropout, return_sequences=True, name=layer_name)
        enc_outputs, state_h, state_c = enc_lstm(enc_outputs,initial_state = enc_states)
        enc_states = [state_h, state_c]

    elif self.cell_type == 'GRU':
      enc_gru = GRU(self.hidden_size, return_state=True,dropout = self.dropout, return_sequences=True, name="Enc_hidden_1")
      enc_outputs, state_h = enc_gru(enc_outputs)
      enc_states = [state_h]
      # Adding a GRU layer with hidden_size internal units.
      for i in range(2, self.num_enc +1):
        layer_name = ('Enc_hidden_%d') %i
        enc_gru = GRU(self.hidden_size, return_state=True,dropout = self.dropout, return_sequences=True, name=layer_name)
        enc_outputs, state_h = enc_gru(enc_outputs, initial_state = enc_states)
        enc_states = [state_h]  

    elif self.cell_type == 'RNN':
      enc_rnn = SimpleRNN(self.hidden_size, return_state=True,dropout = self.dropout, return_sequences=True, name="Enc_hidden_1")
      enc_outputs, state_h = enc_rnn(enc_outputs)
      enc_states = [state_h]
      # Adding a RNN layer with hidden_size internal units.
      for i in range(2, self.num_enc +1):
        layer_name = ('Enc_hidden_%d') %i
        enc_rnn = SimpleRNN(self.hidden_size, return_state=True,dropout = self.dropout, return_sequences=True, name=layer_name)
        enc_outputs, state_h = enc_rnn(enc_outputs, initial_state = enc_states)
        enc_states = [state_h]  

    # The 'enc_states' are fixed as initial state fro decodeer inputs.
    dec_inputs = Input(shape=(None,), name = 'Dec_inputs')
    dec_emb_layer = Embedding(no_dec_tokens, self.hidden_size, mask_zero = True, name = 'Dec_emb')
    dec_emb = dec_emb_layer(dec_inputs)
    dec_outputs = dec_emb
    if self.cell_type == 'LSTM':
      dec_lstm = LSTM(self.hidden_size, return_sequences=True, return_state=True,dropout = self.dropout, name="Dec_hidden_1")
      dec_outputs, _, _ = dec_lstm(dec_outputs, initial_state = enc_states)
      
      for i in range(2, self.num_dec +1):
        layer_name = ('Dec_hidden_%d') %i

        dec_lstm = LSTM(self.hidden_size, return_sequences=True, return_state=True,dropout = self.dropout, name=layer_name)
        dec_outputs, _, _ = dec_lstm(dec_outputs, initial_state = enc_states)

    elif self.cell_type == 'GRU':
      dec_gru = GRU(self.hidden_size, return_sequences=True, return_state=True,dropout = self.dropout, name="Dec_hidden_1")
      dec_outputs, _ = dec_gru(dec_outputs, initial_state = enc_states)

      for i in range(2, self.num_dec+1):
        layer_name = ('Dec_hidden_%d') %i
        dec_gru = GRU(self.hidden_size, return_sequences=True, return_state=True,dropout = self.dropout, name=layer_name)
        dec_outputs, _ = dec_gru(dec_outputs, initial_state = enc_states)

    elif self.cell_type == 'RNN':
      dec_rnn = SimpleRNN(self.hidden_size, return_sequences=True, return_state=True,dropout = self.dropout, name="Dec_hidden_1")
      dec_outputs, _ = dec_rnn(dec_outputs, initial_state = enc_states)

      for i in range(2, self.num_dec+1):
        layer_name = ('Dec_hidden_%d') %i
        dec_rnn = SimpleRNN(self.hidden_size, return_sequences=True, return_state=True,dropout = self.dropout, name=layer_name)
        dec_outputs, _ = dec_rnn(dec_outputs, initial_state = enc_states)

    decoder_dense = Dense(no_dec_tokens, activation='softmax', name = 'dense')
    dec_outputs = decoder_dense(dec_outputs)

    # Define the model that takes encoder and decoder input 
    # to output dec_outputs
    model = Model([enc_inputs, dec_inputs], dec_outputs)

    model.summary()

    # Defining the optimizer and loss function .
    optimizer = Adam(lr=self.learning_rate, beta_1=0.9, beta_2=0.999)
    model.compile(loss = "categorical_crossentropy", optimizer = optimizer, metrics=['accuracy'])
  
    model.fit(
        [enc_input_data, dec_input_data],
        dec_target_data,
        batch_size=self.batch_size,
        epochs=self.epochs
        )
    
    encoder_model,decoder_model = self.inference_model(model)
    data_list = [["SNO", "Input Data", "Target Data", "Predicted Data"]]
 
    global_total = 0
    global_correct = 0
    for i in range(len(val_source)):
      input_seq = x_test[i : i + 1]
      result = self.decode_sequence(encoder_model,decoder_model,input_seq)
      target = y_test[i]
      target = target[1:len(target)-1]
      result = result[0:len(result)-1]
      dlist = [i+1, val_input_lexicons[i], target, result]
      data_list.append(dlist)

      if result.strip() == target.strip():
        global_correct = global_correct + 1
      
      global_total = global_total + 1
      accuracy_epoch = global_correct/global_total
    with open('predictions_vanilla.tsv', 'w', newline='') as file:
      writer = csv.writer(file, delimiter='\t')
      writer.writerows(data_list)
    val_accuracy = global_correct/global_total
    print(val_accuracy)
    
  def inference_model(self,model):
    enc_inputs = model.input[0] 
    if self.cell_type == 'RNN' or self.cell_type == 'GRU':
      enc_outputs, state_h_enc = model.get_layer('Enc_hidden_'+ str(self.num_enc)).output
      enc_states = [state_h_enc]
      encoder_model = Model(enc_inputs, enc_states)

      dec_inputs = model.input[1]  # input_1
      dec_outputs = model.get_layer('Dec_emb')(dec_inputs)
      decoder_states_inputs = []
      decoder_states = []

      for i in range(1,self.num_dec +1):
        decoder_state_input_h = keras.Input(shape=(self.hidden_size,))
        curr_states_inputs = [decoder_state_input_h]
        decoder = model.get_layer('Dec_hidden_'+ str(i))
        dec_outputs, state_h_dec = decoder(dec_outputs, initial_state=curr_states_inputs)

        decoder_states += [state_h_dec]
        decoder_states_inputs += curr_states_inputs

    elif self.cell_type == 'LSTM':
      enc_outputs, state_h_enc, state_c_enc = model.get_layer('Enc_hidden_'+ str(self.num_enc)).output  # lstm_1
      enc_states = [state_h_enc, state_c_enc]
      encoder_model = Model(enc_inputs, enc_states)

      dec_inputs = model.input[1]  # input_1
      dec_outputs = model.get_layer('Dec_emb')(dec_inputs)
      decoder_states_inputs = []
      decoder_states = []

      for i in range(1,self.num_dec +1):
        decoder_state_input_h = keras.Input(shape=(self.hidden_size,))
        decoder_state_input_c = keras.Input(shape=(self.hidden_size,))
        curr_states_inputs = [decoder_state_input_h, decoder_state_input_c]
        decoder = model.get_layer('Dec_hidden_'+ str(i))
        dec_outputs, state_h_dec, state_c_dec = decoder(dec_outputs, initial_state=curr_states_inputs)

        decoder_states += [state_h_dec, state_c_dec]
        decoder_states_inputs += curr_states_inputs


    decoder_dense = model.get_layer('dense')
    dec_outputs = decoder_dense(dec_outputs)
    decoder_model = Model([dec_inputs] + decoder_states_inputs, [dec_outputs] + decoder_states)

    return encoder_model,decoder_model

  def decode_sequence(self,encoder_model,decoder_model,input_seq):
    # Encode the input as state vectors.
    states_value = [encoder_model.predict(input_seq)] * self.num_dec
    
    # Generate empty target sequence of length 1.
    target_seq = np.zeros((1, 1))
    # The first character of target sequence is populated with the start character.
    target_seq[0, 0] = target_token_index['B']

    # Sampling loop for a batch of sequences
    # (to simplify, here we assume a batch of size 1).
    stop_condition = False
    decoded_sentence = ""

    while not stop_condition:
        if self.cell_type == 'RNN' or self.cell_type == 'GRU':
          dummy = decoder_model.predict([target_seq] + [states_value])
          output_tokens, states_value = dummy[0],dummy[1:]
          
        elif self.cell_type == 'LSTM':  
          dummy = decoder_model.predict([target_seq] + states_value)
          output_tokens, states_value = dummy[0],dummy[1:]
        
        #print(output_tokens[0,:,:])
        if self.pred_type == 'greedy':
          beam_w = 1
        elif self.pred_type == 'beam_search':
          beam_w = self.beam_width
        sampled_token_index = self.beam_search_decoder(output_tokens[0,:,:], beam_w)
        sampled_token_index = sampled_token_index[beam_w-1][0]

        # Token sampling
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_char = reverse_target_char_index[sampled_token_index]
        decoded_sentence += sampled_char

        # Exit when reaches max length or stop character is encountered.
        if sampled_char == 'E' or len(decoded_sentence) > dec_seq_length:
            stop_condition = True

        # Update the target sequence (of length 1).
        target_seq = np.zeros((1, 1))
        target_seq[0, 0] = sampled_token_index


    return decoded_sentence
  
  def beam_search_decoder(self,data, k):
    # In beam search decoding we do not need to start with random states; instead,
    # we start with the k most likely words as the first step in the sequence.
    sequences = [[list(), 0.0]]
    for row in data:
      all_candidates = list()
      # expand each current candidate
      for i in range(len(sequences)):
        seq, score = sequences[i]
        for j in range(len(row)):
          candidate = [seq + [j], score - log(row[j])]
          all_candidates.append(candidate)
      # order all candidates by score
      ordered = sorted(all_candidates, key=lambda tup:tup[1])
      # select k best
      sequences = ordered[:k]
    return sequences

## Sweep

In [57]:
sweep_config = {
    'method': 'bayes', 
    'metric': {
      'name': 'val_accuracy',
      'goal': 'maximize'   
    },
    'parameters': {

        'dropout': {
            'values': [0.0, 0.1, 0.2]
        },
        'learning_rate': {
            'values': [1e-3, 1e-4]
        },
        'batch_size': {
            'values': [64, 128]
        },
        'in_emb': {
            'values': [32, 64, 128]
        },
        'num_enc': {
            'values': [1, 2, 3]
        },
        'num_dec': {
            'values': [1, 2, 3]
        },
        'hidden_size':{
            'values': [32, 64, 128]
        },
        'cell_type': {
            'values': ['RNN', 'GRU', 'LSTM']
        },
        'dec_search': {
            'values': ['beam_search', 'greedy']
        },
        'beam_width':{
            'values': [3,5]
        }
    }
}

In [58]:
# Initialize a new sweep
#sweep_id = wandb.sweep(sweep_config, entity="girishrongali", project="assignment3")

## BEST Parameters:

In [59]:
best_batch_size = 64
best_beam_width = 5
best_cell_type = 'GRU'
best_dec_search = 'beam_search'
best_dropout = 0.2
best_epochs = 20
best_hidden_size = 128
best_in_emb = 128
best_learning_rate = 0.001
best_num_dec = 3
best_num_enc = 3
  
model_rnn = GJRNN(cell_type = best_cell_type, in_emb = best_in_emb, hidden_size=best_hidden_size,
                learning_rate= best_learning_rate, dropout=best_dropout,pred_type = best_dec_search,epochs = best_epochs,
                batch_size = best_batch_size, beam_width = best_beam_width, num_enc = best_num_enc, num_dec = best_num_dec)
  
model_rnn.model_build(enc_input_data,dec_input_data,dec_target_data,x_test, y_test)

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 Enc_inputs (InputLayer)        [(None, None)]       0           []                               
                                                                                                  
 Enc_emb (Embedding)            (None, None, 128)    3456        ['Enc_inputs[0][0]']             
                                                                                                  
 Dec_inputs (InputLayer)        [(None, None)]       0           []                               
                                                                                                  
 Enc_hidden_1 (LSTM)            [(None, None, 128),  131584      ['Enc_emb[0][0]']                
                                 (None, 128),                                               

  super(Adam, self).__init__(name, **kwargs)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [60]:
#sweep_id = wandb.sweep(sweep_config, entity="girishrongali", project="assignment3")
#wandb.agent(sweep_id, lambda : train_sweep())

In [61]:
from google.colab import files
files.download("predictions_vanilla.tsv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>