In [None]:
# Necessary packages 
import io
import numpy as np
import tensorflow as tf
from random import randint
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from numpy import array, argmax, array_equal 
import keras.backend as K
from tensorflow.keras import backend as K
from tensorflow.keras import models, Input
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.layers import LSTM, Bidirectional, SimpleRNN, GRU, Dense, Flatten, TimeDistributed, RepeatVector, Lambda
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import plot_model
from tensorflow.keras.optimizers import Adam
tf.keras.backend.set_floatx('float64')

In [None]:
%pip install wandb -q
import wandb
from wandb.keras import WandbCallback

[K     |████████████████████████████████| 1.8 MB 5.5 MB/s 
[K     |████████████████████████████████| 144 kB 56.8 MB/s 
[K     |████████████████████████████████| 181 kB 56.1 MB/s 
[K     |████████████████████████████████| 63 kB 1.6 MB/s 
[?25h  Building wheel for pathtools (setup.py) ... [?25l[?25hdone


In [None]:
wandb.login()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize


wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit: ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [None]:
wandb.init(project="Assignment 3", entity="shubham-argha")

[34m[1mwandb[0m: Currently logged in as: [33margha[0m ([33mshubham-argha[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [None]:
# Dataset
!yes | wget "https://storage.googleapis.com/gresearch/dakshina/dakshina_dataset_v1.0.tar"

--2022-05-04 09:07:22--  https://storage.googleapis.com/gresearch/dakshina/dakshina_dataset_v1.0.tar
Resolving storage.googleapis.com (storage.googleapis.com)... 142.250.125.128, 142.250.148.128, 108.177.112.128, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|142.250.125.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2008340480 (1.9G) [application/x-tar]
Saving to: ‘dakshina_dataset_v1.0.tar’


2022-05-04 09:07:31 (213 MB/s) - ‘dakshina_dataset_v1.0.tar’ saved [2008340480/2008340480]



In [None]:
# Unzip
!yes | tar xopf dakshina_dataset_v1.0.tar

In [None]:
# Train, Test and Validation Data
!ls dakshina_dataset_v1.0/hi/lexicons

hi.translit.sampled.dev.tsv   hi.translit.sampled.train.tsv
hi.translit.sampled.test.tsv


In [None]:
train_dir = "./dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.train.tsv"
dev_dir = "./dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.dev.tsv"
test_dir = "./dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.test.tsv"

In [None]:
# reads text document and returns a list of lists comprising the romanized and native versions of the words
def read(f):
    hindi = []
    latin = []
    with io.open(f, encoding ='utf-8') as f:
        for line in f:
            if '\t' not in line:
                continue
            tokens = line.rstrip().split("\t")
            latin.append(tokens[1])
            hindi.append(tokens[0])
    return latin, hindi

In [None]:
tr_src, tr_tar = read(train_dir)
val_src, val_tar = read(dev_dir)
tst_src, tst_tar = read(test_dir)

In [None]:
print("Number of training samples: ", len(tr_src))
print("Number of validation samples: ", len(val_src))
print("Number of testing samples: ", len(tst_src))

Number of training samples:  44204
Number of validation samples:  4358
Number of testing samples:  4502


In [None]:
A = np.arange(len(tr_src))
np.random.shuffle(A)
A1 = np.arange(len(val_src))
np.random.shuffle(A1)

In [None]:
char_inp = set()
char_tar = set()
ns_txt_inp = []
ns_txt_tar = []
ns_txt_inp_val = []
ns_txt_tar_val = []

In [None]:
for (txt_inp, txt_tar) in zip(tr_src, tr_tar):
    # tab : "start sequence" character
    # \n  : "end sequence" character
    txt_tar = "B" + txt_tar + "E"
    ns_txt_inp.append(txt_inp)
    ns_txt_tar.append(txt_tar)

    for char in txt_inp:
        if char not in char_inp:
            char_inp.add(char)

    for char in txt_tar:
        if char not in char_tar:
            char_tar.add(char)


for (txt_inp, txt_tar) in zip(val_src, val_tar):
    # tab : "start sequence" character
    # \n  : "end sequence" character
    txt_tar = "B" + txt_tar + "E"
    ns_txt_inp_val.append(txt_inp)
    ns_txt_tar_val.append(txt_tar)
    for char in txt_inp:
        if char not in char_inp:
            char_inp.add(char)
    for char in txt_tar:
        if char not in char_tar:
            char_tar.add(char)

In [None]:
inps_txt = []
tars_txt = []

for i in range(len(tr_src)):
    inps_txt.append(ns_txt_inp[A[i]])
    tars_txt.append(ns_txt_tar[A[i]])

In [None]:
txt_inp_vals = []
txt_tar_vals = []

for i in range(len(val_src)):
    txt_inp_vals.append(ns_txt_inp_val[A1[i]])
    txt_tar_vals.append(ns_txt_tar_val[A1[i]])

In [None]:
char_inp.add(" ")
char_tar.add(" ")
char_inp = sorted(list(char_inp))
char_tar = sorted(list(char_tar))

In [None]:
enc_tok_num = len(char_inp)
dec_tok_num= len(char_tar)

In [None]:
len_max_enc = max([len(txt) for txt in inps_txt])
len_max_dec = max([len(txt) for txt in tars_txt])
len_max_enc_val = max([len(txt) for txt in txt_inp_vals])
len_max_dec_val = max([len(txt) for txt in txt_tar_vals])

In [None]:
tok_ind_inp= dict([(j, k) for k, j in enumerate(char_inp)])
tok_ind_tar= dict([(j, k) for k, j in enumerate(char_tar)])

In [None]:
print(tok_ind_inp)
print(tok_ind_tar)

{' ': 0, 'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'f': 6, 'g': 7, 'h': 8, 'i': 9, 'j': 10, 'k': 11, 'l': 12, 'm': 13, 'n': 14, 'o': 15, 'p': 16, 'q': 17, 'r': 18, 's': 19, 't': 20, 'u': 21, 'v': 22, 'w': 23, 'x': 24, 'y': 25, 'z': 26}
{' ': 0, 'B': 1, 'E': 2, 'ँ': 3, 'ं': 4, 'ः': 5, 'अ': 6, 'आ': 7, 'इ': 8, 'ई': 9, 'उ': 10, 'ऊ': 11, 'ऋ': 12, 'ए': 13, 'ऐ': 14, 'ऑ': 15, 'ओ': 16, 'औ': 17, 'क': 18, 'ख': 19, 'ग': 20, 'घ': 21, 'ङ': 22, 'च': 23, 'छ': 24, 'ज': 25, 'झ': 26, 'ञ': 27, 'ट': 28, 'ठ': 29, 'ड': 30, 'ढ': 31, 'ण': 32, 'त': 33, 'थ': 34, 'द': 35, 'ध': 36, 'न': 37, 'प': 38, 'फ': 39, 'ब': 40, 'भ': 41, 'म': 42, 'य': 43, 'र': 44, 'ल': 45, 'व': 46, 'श': 47, 'ष': 48, 'स': 49, 'ह': 50, '़': 51, 'ा': 52, 'ि': 53, 'ी': 54, 'ु': 55, 'ू': 56, 'ृ': 57, 'ॅ': 58, 'े': 59, 'ै': 60, 'ॉ': 61, 'ो': 62, 'ौ': 63, '्': 64, 'ॐ': 65}


In [None]:
inp_txt_trnc = inps_txt[:44160]
tar_txt_trnc = tars_txt[:44160]

In [None]:
enc_inp = np.zeros(
    (len(inp_txt_trnc), len_max_enc, enc_tok_num), dtype="float64"
)
dec_tar = np.zeros(
    (len(inp_txt_trnc), len_max_dec, dec_tok_num), dtype="float64"
)

In [None]:
for i, (txt_inp, txt_tar) in enumerate(zip(inp_txt_trnc, tar_txt_trnc)):
    for m, n in enumerate(txt_inp):
        enc_inp[i, m, tok_ind_inp[n]] = 1.0
    enc_inp[i, m + 1 :, tok_ind_inp[" "]] = 1.0
    for m, n in enumerate(txt_tar):
        dec_tar[i, m, tok_ind_tar[n]] = 1.0
    dec_tar[i, m + 1 :, tok_ind_tar[" "]] = 1.0

In [None]:
inp_val_enc_dt = np.zeros(
    (len(txt_inp_vals), len_max_enc, enc_tok_num), dtype="float64"
)
tar_val_dec_dt = np.zeros(
    (len(txt_tar_vals), len_max_dec, dec_tok_num), dtype="float64"
)

In [None]:
for i, (txt_inp, txt_tar) in enumerate(zip(txt_inp_vals, txt_tar_vals)):
    
    for t, n in enumerate(txt_inp):
        inp_val_enc_dt[i, t, tok_ind_inp[n]] = 1.0
    inp_val_enc_dt[i, t + 1 :, tok_ind_inp[" "]] = 1.0

    for t, n in enumerate(txt_tar):
        tar_val_dec_dt[i, t, tok_ind_tar[n]] = 1.0
    tar_val_dec_dt[i, t + 1: , tok_ind_tar[" "]] = 1.0

In [None]:
class BahdanauAttention(tf.keras.layers.Layer):
  
  def __init__(self, units):
    super(BahdanauAttention, self).__init__()
    self.W1 = tf.keras.layers.Dense(units)
    self.W2 = tf.keras.layers.Dense(units)
    self.V = tf.keras.layers.Dense(1)
    
  def call(self, query, values):
    
    qt = tf.expand_dims(query, 1)
    
    sc = self.V(tf.nn.tanh(
        self.W1(qt) + self.W2(values)))
    
    attn_wgt = tf.nn.softmax(sc, axis=1)
    vec_cxt = attn_wgt * values
    vec_cxt = tf.reduce_sum(vec_cxt, axis=1)

    return vec_cxt, attn_wgt


In [None]:
class model_with_attention(object):

  def __init__(self, Type = 'RNN', hid_layer_size=32, l_r= 1e-3, drop_prob = 0.3, number_of_epochs = 10, batch_size = 32, attn = 'bahdanau'):
    
    self.Type = Type
    self.hid_layer_size = hid_layer_size
    self.l_r = l_r
    self.drop_prob = drop_prob
    self.number_of_epochs = number_of_epochs
    self.batch_size = batch_size
    self.attn = attn

  def fit(self, enc_inp, dec_tar):

    enc_inps = Input(shape=(len_max_enc, enc_tok_num), name='encoder_inputs')

    if self.Type == 'LSTM':

      enc_LSTM = LSTM(self.hid_layer_size,return_sequences=True, return_state=True, dropout = self.drop_prob, name='encoder_lstm')
      enc_outs, hs, cs = enc_LSTM(enc_inps)
      states_enc = [hs, cs]

    elif self.Type == 'GRU':

      enc_GRU = GRU(self.hid_layer_size,return_sequences=True, return_state=True, dropout = self.drop_prob, name='encoder_gru')
      enc_outs, hs = enc_GRU(enc_inps)
      states_enc = [hs]

    elif self.Type == 'RNN':

      enc_rnn = SimpleRNN(self.hid_layer_size,return_sequences=True, return_state=True, dropout = self.drop_prob, name='encoder_rnn')
      enc_outs, hs = enc_rnn(enc_inps)
      states_enc = [hs]

    # Attention Layer
    if self.attn == 'bahdanau':
      attn= BahdanauAttention(self.hid_layer_size)

    # Decoder Layers
    inps_deco = Input(shape=(1, (dec_tok_num + self.hid_layer_size)),name='decoder_inputs')

    if self.Type == 'LSTM':

      dec_LSTM = LSTM(self.hid_layer_size, dropout = self.drop_prob, return_state=True, name='decoder_lstm')
    
    elif self.Type == 'GRU':

      dec_GRU = GRU(self.hid_layer_size, dropout = self.drop_prob, return_state=True, name='decoder_gru')
    
    elif self.Type == 'RNN':

      dec_RNN = SimpleRNN(self.hid_layer_size, dropout = self.drop_prob, return_state=True, name='decoder_rnn')  
    
    
    dec_den = Dense(dec_tok_num, activation='softmax',  name='decoder_dense')
    oa = []

    ip = np.zeros((self.batch_size, 1, dec_tok_num))
    ip[:, 0, 0] = 1 

    dec_outs = hs
    states = states_enc

    for _ in range(len_max_dec):

      vec_cxt, attn_wgt = attn(dec_outs, enc_outs)
      vec_cxt = tf.expand_dims(vec_cxt, 1)
      
      ip = tf.concat([vec_cxt, ip], axis=-1)

      if self.Type == 'LSTM':

        dec_outs, h, c = dec_LSTM(ip, initial_state=states)

      if self.Type == 'GRU':

        dec_outs, h = dec_GRU(ip, initial_state=states)

      if self.Type == 'RNN':

        dec_outs, h = dec_RNN(ip, initial_state=states)
      
      op = dec_den(dec_outs)
      op = tf.expand_dims(op, 1)
      oa.append(op)
      ip = op
      if self.Type == 'LSTM':

        states = [h, c]

      if self.Type == 'GRU' or self.Type == 'RNN':
        
        states = [h]


    dec_outs = Lambda(lambda x: K.concatenate(x, axis=1))(oa)
    model = Model(enc_inps, dec_outs, name='model_encoder_decoder')
    
    optimizer = Adam(lr=self.l_r, beta_1=0.9, beta_2=0.999)
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

    model.fit(enc_inp, dec_tar,
              batch_size=self.batch_size, 
              epochs=self.number_of_epochs,
              #callbacks = [WandbCallback()]
              )

    p = model.predict(inp_val_enc_dt[:4352], batch_size=self.batch_size)

    ct = 0
    g_t = 0
    g_c = 0
    v_t = 4352

    for j in range(0, v_t):
      
      ohv = p[j]
      ohv1 = tar_val_dec_dt[j]
      j2 = tf.argmax(ohv, axis=1)
      j1 = tf.argmax(ohv1, axis=1)
      
      if (j2.numpy() == j1.numpy()).all():
        g_c = g_c + 1
        
      g_t = g_t + 1
      accuracy_epoch = g_c/g_t

      if g_t % 50 == 0:
        wandb.log({'epoch_accuracy' : accuracy_epoch})
    
    val_accuracy = g_c/g_t
    
    wandb.log({'val_accuracy' : val_accuracy})

In [None]:
sweep_config = {
    'method': 'bayes', 
    'metric': {
      'name': 'val_accuracy',
      'goal': 'maximize'   
    },
    'parameters': {

        'drop_prob': {
            'values': [0.0, 0.1, 0.2]
        },
        'l_r': {
            'values': [1e-3, 1e-4]
        },
        'batch_size': {
            'values': [64, 128]
        },
        'hid_layer_size':{
            'values': [32, 64, 128]
        },
        'Type': {
            'values': ['RNN', 'GRU', 'LSTM']
        },
        'attn': {
            'values': ['bahdanau']    
        }
    }
}

In [None]:
sweep_id = wandb.sweep(sweep_config, entity="shubham-argha", project="Assignment 3")

Create sweep with ID: umcym8nf
Sweep URL: https://wandb.ai/shubham-argha/Assignment%203/sweeps/umcym8nf


In [None]:
def train():

  config_defaults = {
        'drop_prob': 0.3,
        'l_r': 1e-3,
        'batch_size': 128,
        'number_of_epochs' : 15,
        'hid_layer_size': 128,
        'Type': 'LSTM',
        'attn': 'bahdanau'
        }

  wandb.init(config = config_defaults)
  
  config = wandb.config

  wandb.run.name = str(config.Type)+ '_' + config.attn +'_bs_'+str(config.batch_size)
  
  model_rnn = model_with_attention(Type = config.Type, hid_layer_size=config.hid_layer_size, l_r= config.l_r, drop_prob=config.drop_prob,epochs = config.number_of_epochs, batch_size = config.batch_size, attn = config.attn)
  
  model_rnn.fit(enc_inp, dec_tar)

In [None]:
wandb.agent("jfhy1gz0", entity="shubham-argha", project="Assignment 3", function =train, count=10)

[34m[1mwandb[0m: Agent Starting Run: mh7obv5a with config:
[34m[1mwandb[0m: 	Type: RNN
[34m[1mwandb[0m: 	attention: bahdanau
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	hid_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001


  super(Adam, self).__init__(name, **kwargs)


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch_accuracy,██▄▂▁▁▃▃▁▂▄▄▅▅▆▆▇▇▆▆▆▆▆▇▇▇▇▇▇▇█▇▆▆▆▅▅▆▅▅
val_accuracy,▁

0,1
epoch_accuracy,0.43185
val_accuracy,0.43131


[34m[1mwandb[0m: Agent Starting Run: de5ox89m with config:
[34m[1mwandb[0m: 	Type: GRU
[34m[1mwandb[0m: 	attention: bahdanau
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	hid_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15