Essentials

In [None]:
!nvidia-smi

Mon May 10 05:55:17 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 465.19.01    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   41C    P0    27W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
import io
import numpy as np
import tensorflow 
from keras.models import Model
from keras.layers import Input, LSTM, Dense, Embedding, GRU, Dropout, SimpleRNN
from keras.utils import to_categorical
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils.vis_utils import plot_model
from math import log
from numpy import array
from numpy import argmax
from keras.optimizers import Adam
import keras
from keras.models import load_model
from math import log1p 

In [None]:
%pip install wandb -q
import wandb
from wandb.keras import WandbCallback

[K     |████████████████████████████████| 1.8MB 14.4MB/s 
[K     |████████████████████████████████| 163kB 62.8MB/s 
[K     |████████████████████████████████| 133kB 60.4MB/s 
[K     |████████████████████████████████| 102kB 13.4MB/s 
[K     |████████████████████████████████| 71kB 9.7MB/s 
[?25h  Building wheel for subprocess32 (setup.py) ... [?25l[?25hdone
  Building wheel for pathtools (setup.py) ... [?25l[?25hdone


**Fetching the dataset** 

Lexicons for Latin-Tamil are taken from Google's Dakshina dataset. The necessary datasets have been uploaded to github, cloned and used for the reminder of the code.

In [None]:
!git clone https://github.com/borate267/lexicon-dataset.git

Cloning into 'lexicon-dataset'...
remote: Enumerating objects: 25, done.[K
remote: Counting objects: 100% (25/25), done.[K
remote: Compressing objects: 100% (25/25), done.[K
remote: Total 25 (delta 5), reused 0 (delta 0), pack-reused 0[K
Unpacking objects: 100% (25/25), done.


In [None]:
# GLOBAL VARIABLES

print_data = True

Reading the dataset


In [None]:
train_dir = "lexicon-dataset/ta.translit.sampled.train.tsv"
dev_dir = "lexicon-dataset/ta.translit.sampled.dev.tsv"
test_dir = "lexicon-dataset/ta.translit.sampled.test.tsv"

# The following function reads the raw text document and returns a list of lists comprising the romanized and native versions of the words

def read_corpus(corpus_file):
  tamil_words = []
  latin_words = []
  with io.open(corpus_file, encoding ='utf-8') as f:
    for line in f:
      if '\t' not in line:
        continue
      tokens = line.rstrip().split("\t")
      latin_words.append(tokens[1])
      tamil_words.append(tokens[0])
  return latin_words, tamil_words

train_source, train_target = read_corpus(train_dir)
valid_source, valid_target = read_corpus(dev_dir)
test_source, test_target = read_corpus(test_dir)

print("Number of training samples: ", len(train_source))
print("Number of validation samples: ", len(valid_source))
print("Number of testing samples: ", len(test_source))




Number of training samples:  68218
Number of validation samples:  6827
Number of testing samples:  6864


In [None]:
arr = np.arange(len(train_source))
np.random.shuffle(arr)
arr1 = np.arange(len(valid_source))
np.random.shuffle(arr1)

input_characters = set()
target_characters = set()
input_texts_ns = []
target_texts_ns = []
val_input_texts_ns = []
val_target_texts_ns = []

for (input_text, target_text) in zip(train_source, train_target):
    # We use "tab" as the "start sequence" character
    # for the targets, and "\n" as "end sequence" character.
    target_text = "B" + target_text + "E"
    input_texts_ns.append(input_text)
    target_texts_ns.append(target_text)
    for char in input_text:
        if char not in input_characters:
            input_characters.add(char)
    for char in target_text:
        if char not in target_characters:
            target_characters.add(char)

for (input_text, target_text) in zip(valid_source, valid_target):
    # We use "tab" as the "start sequence" character
    # for the targets, and "\n" as "end sequence" character.
    target_text = "B" + target_text + "E"
    val_input_texts_ns.append(input_text)
    val_target_texts_ns.append(target_text)
    for char in input_text:
        if char not in input_characters:
            input_characters.add(char)
    for char in target_text:
        if char not in target_characters:
            target_characters.add(char)

input_texts = []
target_texts = []

for i in range(len(train_source)):
    input_texts.append(input_texts_ns[arr[i]])
    target_texts.append(target_texts_ns[arr[i]])

val_input_texts = []
val_target_texts = []

for i in range(len(valid_source)):
    val_input_texts.append(val_input_texts_ns[arr1[i]])
    val_target_texts.append(val_target_texts_ns[arr1[i]])

input_characters.add(" ")
target_characters.add(" ")

input_characters = sorted(list(input_characters))
target_characters = sorted(list(target_characters))



# Adding the padding character
#input_characters.append("P")
#target_characters.append("P")

num_encoder_tokens = len(input_characters)
num_decoder_tokens = len(target_characters)
max_encoder_seq_length = max([len(txt) for txt in input_texts])
max_decoder_seq_length = max([len(txt) for txt in target_texts])
val_max_encoder_seq_length = max([len(txt) for txt in val_input_texts])
val_max_decoder_seq_length = max([len(txt) for txt in val_target_texts])



print("Number of samples:", len(input_texts))
print("Number of unique input tokens:", num_encoder_tokens)
print("Number of unique output tokens:", num_decoder_tokens)
print("Max sequence length for inputs:", max_encoder_seq_length)
print("Max sequence length for outputs:", max_decoder_seq_length)
print("Max sequence length for val inputs:", val_max_encoder_seq_length)
print("Max sequence length for val outputs:", val_max_decoder_seq_length)

print(input_characters)
print(target_characters)

Number of samples: 68218
Number of unique input tokens: 27
Number of unique output tokens: 49
Max sequence length for inputs: 30
Max sequence length for outputs: 28
Max sequence length for val inputs: 23
Max sequence length for val outputs: 22
[' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
[' ', 'B', 'E', 'ஃ', 'அ', 'ஆ', 'இ', 'ஈ', 'உ', 'ஊ', 'எ', 'ஏ', 'ஐ', 'ஒ', 'ஓ', 'க', 'ங', 'ச', 'ஜ', 'ஞ', 'ட', 'ண', 'த', 'ந', 'ன', 'ப', 'ம', 'ய', 'ர', 'ற', 'ல', 'ள', 'ழ', 'வ', 'ஷ', 'ஸ', 'ஹ', 'ா', 'ி', 'ீ', 'ு', 'ூ', 'ெ', 'ே', 'ை', 'ொ', 'ோ', 'ௌ', '்']


In [None]:
print(input_texts[123:130])
print(target_texts[123:130])

['turrnar', 'nichchayam', 'ramar', 'koothai', 'kaaintha', 'kazhivugalum', 'beayr']
['Bடர்னர்E', 'Bநிச்சயம்E', 'Bராமர்E', 'BகோதைE', 'Bகாய்ந்தE', 'Bகழிவுகளும்E', 'Bபீர்E']


Character Embedding

**Encoder Input Sequences**: Padded to a maximum length of max_encSeqLen characters. 
**SHAPE: (len(train_source), max_encSeqLen)**

**Decoder Input Sequences**: Padded to a maximum length of max_encSeqLen characters. 
**SHAPE: (len(train_source), max_decSeqLen)**

**Decoder Target Sequences**: Padded to a maximum length of max_decSeqLen characters with a vocabulary of sizeofTamilVocab different characters. 
**SHAPE: (len(train_source), max_decSeqLen, sizeofTamilVocab)**

For training :

In [None]:
input_token_index = dict([(char, i) for i, char in enumerate(input_characters)])
target_token_index = dict([(char, i) for i, char in enumerate(target_characters)])
print(input_token_index)
print(target_token_index)

{' ': 0, 'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'f': 6, 'g': 7, 'h': 8, 'i': 9, 'j': 10, 'k': 11, 'l': 12, 'm': 13, 'n': 14, 'o': 15, 'p': 16, 'q': 17, 'r': 18, 's': 19, 't': 20, 'u': 21, 'v': 22, 'w': 23, 'x': 24, 'y': 25, 'z': 26}
{' ': 0, 'B': 1, 'E': 2, 'ஃ': 3, 'அ': 4, 'ஆ': 5, 'இ': 6, 'ஈ': 7, 'உ': 8, 'ஊ': 9, 'எ': 10, 'ஏ': 11, 'ஐ': 12, 'ஒ': 13, 'ஓ': 14, 'க': 15, 'ங': 16, 'ச': 17, 'ஜ': 18, 'ஞ': 19, 'ட': 20, 'ண': 21, 'த': 22, 'ந': 23, 'ன': 24, 'ப': 25, 'ம': 26, 'ய': 27, 'ர': 28, 'ற': 29, 'ல': 30, 'ள': 31, 'ழ': 32, 'வ': 33, 'ஷ': 34, 'ஸ': 35, 'ஹ': 36, 'ா': 37, 'ி': 38, 'ீ': 39, 'ு': 40, 'ூ': 41, 'ெ': 42, 'ே': 43, 'ை': 44, 'ொ': 45, 'ோ': 46, 'ௌ': 47, '்': 48}


In [None]:
encoder_input_data = np.zeros(
    (len(input_texts), max_encoder_seq_length), dtype="float32"
)
decoder_input_data = np.zeros(
    (len(input_texts), max_decoder_seq_length), dtype="float32"
)
decoder_target_data = np.zeros(
    (len(input_texts), max_decoder_seq_length, num_decoder_tokens), dtype="float32"
)

for i, (input_text, target_text) in enumerate(zip(input_texts, target_texts)):
    for t, char in enumerate(input_text):
        encoder_input_data[i, t] = input_token_index[char]
    #encoder_input_data[i, t + 1 :] = input_token_index["P"]
    encoder_input_data[i, t + 1 :] = input_token_index[" "]

    for t, char in enumerate(target_text):
        # decoder_target_data is ahead of decoder_input_data by one timestep
        decoder_input_data[i, t] = target_token_index[char]
        if t > 0:
            # decoder_target_data will be ahead by one timestep
            # and will not include the start character.
            decoder_target_data[i, t - 1, target_token_index[char]] = 1.0
    #decoder_input_data[i, t + 1: ] = target_token_index["P"]
    decoder_input_data[i, t + 1: ] = target_token_index[" "]
    #decoder_target_data[i, t:, target_token_index["P"]] = 1.0
    decoder_target_data[i, t:, target_token_index[" "]] = 1.0


val_encoder_input_data = np.zeros(
    (len(input_texts), val_max_encoder_seq_length), dtype="float32"
)
val_decoder_input_data = np.zeros(
    (len(input_texts), val_max_decoder_seq_length), dtype="float32"
)
val_decoder_target_data = np.zeros(
    (len(input_texts), val_max_decoder_seq_length, num_decoder_tokens), dtype="float32"
)

for i, (input_text, target_text) in enumerate(zip(val_input_texts, val_target_texts)):
    for t, char in enumerate(input_text):
        val_encoder_input_data[i, t] = input_token_index[char]
    #encoder_input_data[i, t + 1 :] = input_token_index["P"]
    val_encoder_input_data[i, t + 1 :] = input_token_index[" "]

    for t, char in enumerate(target_text):
        # decoder_target_data is ahead of decoder_input_data by one timestep
        val_decoder_input_data[i, t] = target_token_index[char]
        if t > 0:
            # decoder_target_data will be ahead by one timestep
            # and will not include the start character.
            val_decoder_target_data[i, t - 1, target_token_index[char]] = 1.0
    #decoder_input_data[i, t + 1: ] = target_token_index["P"]
    val_decoder_input_data[i, t + 1: ] = target_token_index[" "]
    #decoder_target_data[i, t:, target_token_index["P"]] = 1.0
    val_decoder_target_data[i, t:, target_token_index[" "]] = 1.0




In [None]:
reverse_input_char_index = dict((i, char) for char, i in input_token_index.items())
reverse_target_char_index = dict((i, char) for char, i in target_token_index.items())
print(reverse_target_char_index)

{0: ' ', 1: 'B', 2: 'E', 3: 'ஃ', 4: 'அ', 5: 'ஆ', 6: 'இ', 7: 'ஈ', 8: 'உ', 9: 'ஊ', 10: 'எ', 11: 'ஏ', 12: 'ஐ', 13: 'ஒ', 14: 'ஓ', 15: 'க', 16: 'ங', 17: 'ச', 18: 'ஜ', 19: 'ஞ', 20: 'ட', 21: 'ண', 22: 'த', 23: 'ந', 24: 'ன', 25: 'ப', 26: 'ம', 27: 'ய', 28: 'ர', 29: 'ற', 30: 'ல', 31: 'ள', 32: 'ழ', 33: 'வ', 34: 'ஷ', 35: 'ஸ', 36: 'ஹ', 37: 'ா', 38: 'ி', 39: 'ீ', 40: 'ு', 41: 'ூ', 42: 'ெ', 43: 'ே', 44: 'ை', 45: 'ொ', 46: 'ோ', 47: 'ௌ', 48: '்'}


In [None]:
print(encoder_input_data[1])
print(decoder_input_data[1])
print(decoder_target_data[1])

[16.  1.  1. 20.  1. 12.  1.  1. 11.  1.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
[ 1. 25. 37. 20. 30. 37. 15.  2.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]]


For Validation and testing:

In [None]:
#(TODO 2): Verify decoder input and target have an offset of one
print(val_decoder_input_data[26])
print(val_decoder_target_data[26])

[ 1. 15. 30. 37. 23. 38. 22. 38. 25. 48.  2.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.]
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]]


In [None]:
x_test = [val_encoder_input_data, val_decoder_input_data]
y_test = val_decoder_target_data

Configuring the Sweep Hyperparameter dictionary

Defining the model

In [None]:
class MyRNN(object):
  def __init__(self,cell_type = 'RNN',in_emb = 32, hidden_size=32, learning_rate= 1e-3, 
               dropout=0.4,pred_type ='greedy',epochs = 10, batch_size = 32,beam_width = 5,
               num_enc_dec =2):
    
    self.cell_type = cell_type
    self.in_emb = in_emb
    self.hidden_size = hidden_size
    self.learning_rate = learning_rate
    self.dropout = dropout
    self.pred_type = pred_type
    self.epochs = epochs
    self.batch_size = batch_size
    self.beam_width = beam_width
    self.num_enc_dec = num_enc_dec

  def build_fit(self,encoder_input_data,decoder_input_data,decoder_target_data,x_test, y_test):
    # Define an input sequence and process it.
    encoder_inputs = Input(shape=(None, ))

    # Add an Embedding layer expecting input vocab of size num_encoder_tokens, and
    # output embedding dimension of size in_enc.
    enc_emb =  Embedding(num_encoder_tokens, self.in_emb , mask_zero = True)(encoder_inputs)

    encoder_outputs = enc_emb
    if self.cell_type == 'LSTM':
        # Add a LSTM layer with hidden_size internal units.
        if self.num_enc_dec == 1:
          encoder_lstm = LSTM(self.hidden_size, return_state=True,dropout = self.dropout, return_sequences=True, name="Enc_LSTM_1")
          encoder_outputs, state_h, state_c = encoder_lstm(encoder_outputs)
          encoder_states = [state_h, state_c]
        
        elif self.num_enc_dec == 2:
          encoder_lstm_1 = LSTM(self.hidden_size, return_state=True,dropout = self.dropout, return_sequences=True, name="Enc_LSTM_1")
          encoder_outputs_1, state_h_1, state_c_1 = encoder_lstm_1(encoder_outputs)
          encoder_states_1 = [state_h_1, state_c_1]

          encoder_lstm = LSTM(self.hidden_size, return_state=True,dropout = self.dropout, return_sequences=True, name="Enc_LSTM_2")
          encoder_outputs, state_h, state_c = encoder_lstm(encoder_outputs_1)
          encoder_states = [state_h, state_c]
          
        elif self.num_enc_dec == 3:
          encoder_lstm_1 = LSTM(self.hidden_size, return_state=True,dropout = self.dropout, return_sequences=True, name="Enc_LSTM_1")
          encoder_outputs_1, state_h_1, state_c_1 = encoder_lstm_1(encoder_outputs)
          encoder_states_1 = [state_h_1, state_c_1]
          
          encoder_lstm_2 = LSTM(self.hidden_size, return_state=True,dropout = self.dropout, return_sequences=True, name="Enc_LSTM_2")
          encoder_outputs_2, state_h_2, state_c_2 = encoder_lstm_2(encoder_outputs_1)
          encoder_states_2 = [state_h_2, state_c_2]
          
          encoder_lstm = LSTM(self.hidden_size, return_state=True,dropout = self.dropout, return_sequences=True, name="Enc_LSTM_3")
          encoder_outputs, state_h, state_c = encoder_lstm(encoder_outputs_2)
          encoder_states = [state_h, state_c]
    
    elif self.cell_type == 'GRU':
        if self.num_enc_dec == 1:
          encoder_gru = GRU(self.hidden_size, return_state=True,dropout = self.dropout, return_sequences=True, name="Enc_GRU_1")
          encoder_outputs, state_h = encoder_gru(encoder_outputs)
          encoder_states = [state_h]
        
        elif self.num_enc_dec == 2:
          encoder_gru_1 = GRU(self.hidden_size, return_state=True,dropout = self.dropout, return_sequences=True, name="Enc_GRU_")
          encoder_outputs_1, state_h_1 = encoder_gru_1(encoder_outputs)
          encoder_states_1 = [state_h_1]

          encoder_gru = GRU(self.hidden_size, return_state=True,dropout = self.dropout, return_sequences=True, name="Enc_GRU_2")
          encoder_outputs, state_h = encoder_gru(encoder_outputs_1)
          encoder_states = [state_h]
          
        elif self.num_enc_dec == 3:
          encoder_gru_1 = GRU(self.hidden_size, return_state=True,dropout = self.dropout, return_sequences=True, name="Enc_GRU_1")
          encoder_outputs_1,state_h_1 = encoder_gru_1(encoder_outputs)
          encoder_states_1 = [state_h_1]
          
          encoder_gru_2 = GRU(self.hidden_size, return_state=True,dropout = self.dropout, return_sequences=True, name="Enc_GRU_2")
          encoder_outputs_2, state_h_2 = encoder_gru_2(encoder_outputs_1)
          encoder_states_2 = [state_h_2]
          
          encoder_gru = GRU(self.hidden_size, return_state=True,dropout = self.dropout, return_sequences=True, name="Enc_GRU_3")
          encoder_outputs, state_h = encoder_gru(encoder_outputs_2)
          encoder_states = [state_h]

    elif self.cell_type == 'RNN':
        if self.num_enc_dec == 1:
          encoder_rnn = SimpleRNN(self.hidden_size, return_state=True,dropout = self.dropout, return_sequences=True, name="Enc_RNN_1")
          encoder_outputs, state_h = encoder_rnn(encoder_outputs)
          encoder_states = [state_h]
        
        elif self.num_enc_dec == 2:
          encoder_rnn_1 = SimpleRNN(self.hidden_size, return_state=True,dropout = self.dropout, return_sequences=True, name="Enc_RNN_")
          encoder_outputs_1, state_h_1 = encoder_rnn_1(encoder_outputs)
          encoder_states_1 = [state_h_1]

          encoder_rnn = SimpleRNN(self.hidden_size, return_state=True,dropout = self.dropout, return_sequences=True, name="Enc_RNN_2")
          encoder_outputs, state_h = encoder_rnn(encoder_outputs_1)
          encoder_states = [state_h]
          
        elif self.num_enc_dec == 3:
          encoder_rnn_1 = SimpleRNN(self.hidden_size, return_state=True,dropout = self.dropout, return_sequences=True, name="Enc_RNN_1")
          encoder_outputs_1,state_h_1 = encoder_rnn_1(encoder_outputs)
          encoder_states_1 = [state_h_1]
          
          encoder_rnn_2 = SimpleRNN(self.hidden_size, return_state=True,dropout = self.dropout, return_sequences=True, name="Enc_RNN_2")
          encoder_outputs_2, state_h_2 = encoder_rnn_2(encoder_outputs_1)
          encoder_states_2 = [state_h_2]
          
          encoder_rnn = SimpleRNN(self.hidden_size, return_state=True,dropout = self.dropout, return_sequences=True, name="Enc_RNN_3")
          encoder_outputs, state_h = encoder_rnn(encoder_outputs_2)
          encoder_states = [state_h]


    # Set up the decoder, using `encoder_states` as initial state.
    decoder_inputs = Input(shape=(None,))
    dec_emb_layer = Embedding(num_decoder_tokens, self.hidden_size, mask_zero = True)
    dec_emb = dec_emb_layer(decoder_inputs)
    # We set up our decoder to return full output sequences,
    # and to return internal states as well. We don't use the
    # return states in the training model, but we will use them in inference.
    decoder_outputs = dec_emb
    if self.cell_type == 'LSTM':
    
      if self.num_enc_dec == 1:
        decoder_lstm = LSTM(self.hidden_size, return_sequences=True, return_state=True,dropout = self.dropout, name="Dec_LSTM_1")
        decoder_outputs, _, _ = decoder_lstm(decoder_outputs, initial_state = encoder_states)

      elif self.num_enc_dec == 2:
        decoder_lstm_1 = LSTM(self.hidden_size, return_sequences=True, return_state=True,dropout = self.dropout, name="Dec_LSTM_1")
        decoder_outputs_1, _, _ = decoder_lstm_1(decoder_outputs, initial_state = encoder_states_1)

        decoder_lstm = LSTM(self.hidden_size, return_sequences=True, return_state=True,dropout = self.dropout, name="Dec_LSTM_2")
        decoder_outputs, _, _ = decoder_lstm(decoder_outputs_1, initial_state = encoder_states)
      
      elif self.num_enc_dec == 3:
        decoder_lstm_1 = LSTM(self.hidden_size, return_sequences=True, return_state=True,dropout = self.dropout, name="Dec_LSTM_1")
        decoder_outputs_1, _, _ = decoder_lstm_1(decoder_outputs, initial_state = encoder_states_1)

        decoder_lstm_2 = LSTM(self.hidden_size, return_sequences=True, return_state=True,dropout = self.dropout, name="Dec_LSTM_2")
        decoder_outputs_2, _, _ = decoder_lstm_2(decoder_outputs, initial_state = encoder_states_2)

        decoder_lstm = LSTM(self.hidden_size, return_sequences=True, return_state=True,dropout = self.dropout, name="Dec_LSTM_3")
        decoder_outputs, _, _ = decoder_lstm(decoder_outputs_2, initial_state = encoder_states)

    elif self.cell_type == 'GRU':

      if self.num_enc_dec == 1:
        decoder_gru = GRU(self.hidden_size, return_sequences=True, return_state=True,dropout = self.dropout, name="Dec_GRU_1")
        decoder_outputs, _ = decoder_gru(decoder_outputs, initial_state = encoder_states)

      elif self.num_enc_dec == 2:
        decoder_gru_1 = GRU(self.hidden_size, return_sequences=True, return_state=True,dropout = self.dropout, name="Dec_GRU_1")
        decoder_outputs_1, _ = decoder_gru_1(decoder_outputs, initial_state = encoder_states_1)

        decoder_gru = GRU(self.hidden_size, return_sequences=True, return_state=True,dropout = self.dropout, name="Dec_GRU_2")
        decoder_outputs, _ = decoder_gru(decoder_outputs_1, initial_state = encoder_states)
      
      elif self.num_enc_dec == 3:
        decoder_gru_1 = GRU(self.hidden_size, return_sequences=True, return_state=True,dropout = self.dropout, name="Dec_GRU_1")
        decoder_outputs_1, _ = decoder_gru_1(decoder_outputs, initial_state = encoder_states_1)

        decoder_gru_2 = GRU(self.hidden_size, return_sequences=True, return_state=True,dropout = self.dropout, name="Dec_GRU_2")
        decoder_outputs_2, _ = decoder_gru_2(decoder_outputs, initial_state = encoder_states_2)

        decoder_gru = GRU(self.hidden_size, return_sequences=True, return_state=True,dropout = self.dropout, name="Dec_GRU_3")
        decoder_outputs, _ = decoder_gru(decoder_outputs_2, initial_state = encoder_states)

    elif self.cell_type == 'RNN':

      if self.num_enc_dec == 1:
        decoder_rnn = SimpleRNN(self.hidden_size, return_sequences=True, return_state=True,dropout = self.dropout, name="Dec_RNN_1")
        decoder_outputs, _ = decoder_rnn(decoder_outputs, initial_state = encoder_states)

      elif self.num_enc_dec == 2:
        decoder_rnn_1 = SimpleRNN(self.hidden_size, return_sequences=True, return_state=True,dropout = self.dropout, name="Dec_RNN_1")
        decoder_outputs_1, _ = decoder_rnn_1(decoder_outputs, initial_state = encoder_states_1)

        decoder_rnn = SimpleRNN(self.hidden_size, return_sequences=True, return_state=True,dropout = self.dropout, name="Dec_RNN_2")
        decoder_outputs, _ = decoder_rnn(decoder_outputs_1, initial_state = encoder_states)
      
      elif self.num_enc_dec == 3:
        decoder_rnn_1 = SimpleRNN(self.hidden_size, return_sequences=True, return_state=True,dropout = self.dropout, name="Dec_RNN_1")
        decoder_outputs_1, _ = decoder_rnn_1(decoder_outputs, initial_state = encoder_states_1)

        decoder_rnn_2 = SimpleRNN(self.hidden_size, return_sequences=True, return_state=True,dropout = self.dropout, name="Dec_RNN_2")
        decoder_outputs_2, _ = decoder_rnn_2(decoder_outputs, initial_state = encoder_states_2)

        decoder_rnn = SimpleRNN(self.hidden_size, return_sequences=True, return_state=True,dropout = self.dropout, name="Dec_RNN_3")
        decoder_outputs, _ = decoder_rnn(decoder_outputs_2, initial_state = encoder_states)

    #hidden = Dense(128, activation="relu")
    #hidden_outputs = hidden(decoder_outputs)
    #drop = Dropout(self.dropout)
    #dropout_out = drop(hidden_outputs)
    decoder_dense = Dense(num_decoder_tokens, activation='softmax')
    #decoder_outputs = decoder_dense(dropout_out)
    decoder_outputs = decoder_dense(decoder_outputs)

    # Define the model that takes encoder and decoder input 
    # to output decoder_outputs
    model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

    model.summary()

    plot_model(model, to_file='model.png', show_shapes=True)
    
    # Define the optimizer
    optimizer = Adam(lr=self.learning_rate, beta_1=0.9, beta_2=0.999)
    model.compile(loss = "categorical_crossentropy", optimizer = optimizer, metrics=['accuracy'])
  
    model.fit(
        [encoder_input_data, decoder_input_data],
        decoder_target_data,
        batch_size=self.batch_size,
        epochs=self.epochs,
        #callbacks = [WandbCallback()]
        )
    
    model.save("s2s")
    
    #model = keras.models.load_model("s2s")
    
    encoder_model,decoder_model = self.inference_model(model,hidden_size = self.hidden_size)
 
    global_total = 0
    global_correct = 0
    for i in range(len(valid_source)):
      input_seq = val_encoder_input_data[i : i + 1]
      result = self.decode_sequence(encoder_model,decoder_model,input_seq,pred_type= self.pred_type,beam_width=self.beam_width)
      target = val_target_texts[i]
      target = target[1:len(target)-1]
      result = result[0:len(result)-1]
      print("Target: %s \n Result: %s" % (target, result))

      if result.strip() == target.strip():
        global_correct = global_correct + 1
      
      global_total = global_total + 1
      accuracy_epoch = global_correct/global_total
      print("Accuracy: %s" % (accuracy_epoch))
    
    val_accuracy = global_correct/global_total
    print(val_accuracy)


    #wandb.log({'val_accuracy' : val_accuracy})
    
  def inference_model(self,model,hidden_size):
    encoder_inputs = model.input[0]  # input_1
    encoder_outputs, state_h_enc, state_c_enc = model.layers[4].output  # lstm_1
    encoder_states = [state_h_enc, state_c_enc]
    encoder_model = Model(encoder_inputs, encoder_states)

    decoder_inputs = model.input[1]  # input_1
    decoder_state_input_h = keras.Input(shape=(hidden_size,))
    decoder_state_input_c = keras.Input(shape=(hidden_size,))
    decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]


    #decoder_emb = model.layers[3].output
    decoder_emb = model.layers[3](decoder_inputs)
    decoder_lstm = model.layers[5]
    decoder_outputs, state_h_dec, state_c_dec = decoder_lstm(
        decoder_emb, initial_state=decoder_states_inputs
    )

    decoder_states = [state_h_dec, state_c_dec]
    decoder_dense = model.layers[6]
    decoder_outputs = decoder_dense(decoder_outputs)
    decoder_model = Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states)

    return encoder_model,decoder_model

  def decode_sequence(self,encoder_model,decoder_model,input_seq, pred_type,beam_width):
    # Encode the input as state vectors.
    states_value = encoder_model.predict(input_seq)
    
    # Generate empty target sequence of length 1.
    target_seq = np.zeros((1, 1))
    # Populate the first character of target sequence with the start character.
    target_seq[0, 0] = target_token_index['B']

    # Sampling loop for a batch of sequences
    # (to simplify, here we assume a batch of size 1).
    stop_condition = False
    decoded_sentence = ""

    while not stop_condition:
        output_tokens, h, c = decoder_model.predict([target_seq] + states_value)
        #print(output_tokens[0,:,:])
        if pred_type == 'greedy':
          beam_w = 1
        elif pred_type == 'beam_search':
          beam_w = beam_width
        sampled_token_index = self.beam_search_decoder(output_tokens[0,:,:], beam_w)
        sampled_token_index = sampled_token_index[beam_w-1][0]

        # Sample a token
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_char = reverse_target_char_index[sampled_token_index]
        decoded_sentence += sampled_char

        # Exit condition: either hit max length
        # or find stop character.
        if sampled_char == 'E' or len(decoded_sentence) > max_decoder_seq_length:
            stop_condition = True

        # Update the target sequence (of length 1).
        target_seq = np.zeros((1, 1))
        target_seq[0, 0] = sampled_token_index

        # Update states
        states_value = [h, c]

    return decoded_sentence
  
  def beam_search_decoder(self,data, k):
    sequences = [[list(), 0.0]]
    # walk over each step in sequence
    for row in data:
      all_candidates = list()
      # expand each current candidate
      for i in range(len(sequences)):
        seq, score = sequences[i]
        for j in range(len(row)):
          candidate = [seq + [j], score - log(row[j])]
          #candidate = [seq + [j], score - log1p(row[j])]
          all_candidates.append(candidate)
      # order all candidates by score
      ordered = sorted(all_candidates, key=lambda tup:tup[1])
      # select k best
      sequences = ordered[:k]
    return sequences

Sweep

In [None]:
sweep_config = {
    'method': 'bayes', 
    'metric': {
      'name': 'val_accuracy',
      'goal': 'maximize'   
    },
    'parameters': {

        'dropout': {
            'values': [0.0, 0.1, 0.2]
        },
        'learning_rate': {
            'values': [1e-3, 1e-4]
        },
        'batch_size': {
            'values': [64, 128]
        },
        'in_emb': {
            'values': [32, 64, 128]
        },
        'num_enc_dec': {
            'values': [1, 2, 3]
        },
        'hidden_size':{
            'values': [32, 64, 128]
        },
        'cell_type': {
            'values': ['RNN', 'GRU', 'LSTM']
        },
        'dec_search': {
            'values': ['beam_search', 'greedy']
        },
        'beam_width':{
            'values': [3,5]
        }
    }
}


In [None]:
# Initialize a new sweep
sweep_id = wandb.sweep(sweep_config, entity="cs6910assignment3", project="RNN")

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Create sweep with ID: whvsrv0e
Sweep URL: https://wandb.ai/cs6910assignment3/RNN/sweeps/whvsrv0e


In [None]:
def train_sweep():
  config_defaults = {
        'dropout': 0.4,
        'learning_rate': 1e-3,
        'batch_size': 32,
        'epochs' : 10,
        'in_emb': 32,
        'num_enc_dec': 2,
        'hidden_size': 32,
        'cell_type': 'RNN',
        'dec_search': 'beam_search',
        'beam_width': 5
        }

  # Initialize a new wandb run
  wandb.init(config = config_defaults)
  
  # Config is a variable that holds and saves hyperparameters and inputs
  config = wandb.config
  wandb.run.name = 'cell_type_'+ str(config.cell_type)+'_dec_search_'+ config.dec_search+'_bs_'+str(config.batch_size)
  
  model_rnn = MyRNN(cell_type = config.cell_type, in_emb = config.in_emb, hidden_size=config.hidden_size,
                learning_rate= config.learning_rate, dropout=config.dropout,pred_type = config.dec_search,epochs = config.epochs,
                batch_size = config.batch_size, beam_width = config.beam_width, num_enc_dec = config.num_enc_dec)
  
  model_rnn.build_fit(encoder_input_data,decoder_input_data,decoder_target_data,x_test, y_test)

In [None]:
#wandb.agent(sweep_id, train_sweep,count=100)
wandb.agent("9623tlhn", entity="cs6910assignment3",project="RNN", function =train_sweep,count=100)

In [None]:
model_rnn = MyRNN(cell_type = 'LSTM', in_emb = 128, hidden_size=128,
              learning_rate= 1e-3, dropout=0.1,pred_type = 'beam_search',epochs = 10,
              batch_size = 128, beam_width = 3, num_enc_dec = 1)

In [None]:
model_rnn.build_fit(encoder_input_data,decoder_input_data,decoder_target_data,x_test, y_test)

Model: "model_9"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_13 (InputLayer)           [(None, None)]       0                                            
__________________________________________________________________________________________________
input_14 (InputLayer)           [(None, None)]       0                                            
__________________________________________________________________________________________________
embedding_8 (Embedding)         (None, None, 128)    3456        input_13[0][0]                   
__________________________________________________________________________________________________
embedding_9 (Embedding)         (None, None, 128)    6272        input_14[0][0]                   
____________________________________________________________________________________________



INFO:tensorflow:Assets written to: s2s/assets


INFO:tensorflow:Assets written to: s2s/assets


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Accuracy: 0.4712265064909901
Target: மற்றவர்களால் 
 Result: மற்றவர்களால்
Accuracy: 0.4713289422704378
Target: சுகுமாரன் 
 Result: சுகுமாரன்
Accuracy: 0.47143133836916523
Target: கோபித்துக்கொண்டு 
 Result: கோப்பிட்டுக்கொண்டு
Accuracy: 0.4713400464756003
Target: செயற்பாடும் 
 Result: செயற்படும்
Accuracy: 0.4712487899322362
Target: மரீனா 
 Result: மரினா
Accuracy: 0.4711575687185443
Target: உட்கொள்ள 
 Result: வேண்டும்
Accuracy: 0.471066382814012
Target: தமிழக 
 Result: தமிழக
Accuracy: 0.4711687306501548
Target: கேட்டி 
 Result: காடியே
Accuracy: 0.4710775778680596
Target: ஆன்மாவின் 
 Result: ஆன்மாவின்
Accuracy: 0.47117988394584137
Target: கிலோ 
 Result: கிலோ
Accuracy: 0.47128215045445754
Target: முன்மாதிரியாக 
 Result: முன்மாறியத்திற்கு
Accuracy: 0.47119102861562256
Target: அத்வைத 
 Result: அத்வைத
Accuracy: 0.4712932534312778
Target: தோன்றியதை 
 Result: தோன்றியதை
Accuracy: 0.47139543873212214
Target: படப்பிடிப்பும் 
 Result: ப