Essentials

In [1]:
!nvidia-smi

Thu May  6 17:04:22 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 465.19.01    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   38C    P0    26W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [25]:
import io
import numpy as np
import tensorflow 
from keras.models import Model
from keras.layers import Input, LSTM, Dense, Embedding, GRU, Dropout, SimpleRNN
from keras.utils import to_categorical
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils.vis_utils import plot_model
from math import log
from numpy import array
from numpy import argmax
from keras.optimizers import Adam

In [3]:
%pip install wandb -q
import wandb
from wandb.keras import WandbCallback

**Fetching the dataset** 

Lexicons for Latin-Tamil are taken from Google's Dakshina dataset. The necessary datasets have been uploaded to github, cloned and used for the reminder of the code.

In [4]:
!git clone https://github.com/borate267/lexicon-dataset.git

fatal: destination path 'lexicon-dataset' already exists and is not an empty directory.


In [5]:
# GLOBAL VARIABLES

print_data = True

Reading the dataset


In [6]:
train_dir = "lexicon-dataset/ta.translit.sampled.train.tsv"
dev_dir = "lexicon-dataset/ta.translit.sampled.dev.tsv"
test_dir = "lexicon-dataset/ta.translit.sampled.test.tsv"

# The following function reads the raw text document and returns a list of lists comprising the romanized and native versions of the words

def read_corpus(corpus_file):
  tamil_words = []
  latin_words = []
  with io.open(corpus_file, encoding ='utf-8') as f:
    for line in f:
      if '\t' not in line:
        continue
      tokens = line.rstrip().split("\t")
      latin_words.append(tokens[1])
      tamil_words.append(tokens[0])
  return latin_words, tamil_words

train_source, train_target = read_corpus(train_dir)
valid_source, valid_target = read_corpus(dev_dir)
test_source, test_target = read_corpus(test_dir)


In [7]:
# Pre-processing data

go = 'G'
stop = 'S'

# The following lists contain the source and target words that are to
# be used for training and validation

train_source = []
train_target = []
val_source = []
val_target = []

# The following entities hold the vocabulary for Tamil and latin languages
vocab_source = set()
vocab_target = set()

# Procuring training data
with io.open(train_dir, encoding ='utf-8') as f:
  for line in f:
    if '\t' not in line:
      continue
    tokens = line.rstrip().split("\t")
    input_text = tokens[1]
    target_text = tokens[0]
    train_source.append(input_text)
    train_target.append(go + target_text + stop)

    # Creating vocabulary
    for char in input_text:
        if char not in vocab_source:
            vocab_source.add(char)
    for char in target_text:
        if char not in vocab_target:
            vocab_target.add(char)

vocab_target.add(go)
vocab_target.add(stop)

# Procuring Validation data
with io.open(dev_dir, encoding ='utf-8') as f:
  for line in f:
    if '\t' not in line:
      continue
    tokens = line.rstrip().split("\t")
    input_text = tokens[1]
    target_text = tokens[0]
    val_source.append(input_text)
    val_target.append(go + target_text + stop)

    # Updating vocabulary
    for char in input_text:
        if char not in vocab_source:
            vocab_source.add(char)
    for char in target_text:
        if char not in vocab_target:
            vocab_target.add(char)

vocab_source = sorted(list(vocab_source))
vocab_target = sorted(list(vocab_target))
num_encoder_tokens = len(vocab_source)
num_decoder_tokens = len(vocab_target)
max_encoder_seq_length = max([len(txt) for txt in train_source])
max_decoder_seq_length = max([len(txt) for txt in train_target])

if (print_data):
    print("Number of training samples: ", len(train_source))
    print("Number of validation samples: ", len(valid_source))
    print("Number of testing samples: ", len(test_source))
    print("Number of unique input tokens:", num_encoder_tokens)
    print("Number of unique output tokens:", num_decoder_tokens)
    print("Max sequence length for inputs:", max_encoder_seq_length)
    print("Max sequence length for outputs:", max_decoder_seq_length)

# Creating tokens for vocabulary
input_token_index = dict([(char, i) for i, char in enumerate(vocab_source)])
target_token_index = dict([(char, i) for i, char in enumerate(vocab_target)])

#(TODO 1): Check if the start and end of word have tokens and check if they are in vocab
print(train_target[0])
print(target_token_index)

Number of training samples:  68218
Number of validation samples:  6827
Number of testing samples:  6864
Number of unique input tokens: 26
Number of unique output tokens: 48
Max sequence length for inputs: 30
Max sequence length for outputs: 28
Gஃபியட்S
{'G': 0, 'S': 1, 'ஃ': 2, 'அ': 3, 'ஆ': 4, 'இ': 5, 'ஈ': 6, 'உ': 7, 'ஊ': 8, 'எ': 9, 'ஏ': 10, 'ஐ': 11, 'ஒ': 12, 'ஓ': 13, 'க': 14, 'ங': 15, 'ச': 16, 'ஜ': 17, 'ஞ': 18, 'ட': 19, 'ண': 20, 'த': 21, 'ந': 22, 'ன': 23, 'ப': 24, 'ம': 25, 'ய': 26, 'ர': 27, 'ற': 28, 'ல': 29, 'ள': 30, 'ழ': 31, 'வ': 32, 'ஷ': 33, 'ஸ': 34, 'ஹ': 35, 'ா': 36, 'ி': 37, 'ீ': 38, 'ு': 39, 'ூ': 40, 'ெ': 41, 'ே': 42, 'ை': 43, 'ொ': 44, 'ோ': 45, 'ௌ': 46, '்': 47}


Character Embedding

**Encoder Input Sequences**: Padded to a maximum length of max_encSeqLen characters. 
**SHAPE: (len(train_source), max_encSeqLen)**

**Decoder Input Sequences**: Padded to a maximum length of max_encSeqLen characters. 
**SHAPE: (len(train_source), max_decSeqLen)**

**Decoder Target Sequences**: Padded to a maximum length of max_decSeqLen characters with a vocabulary of sizeofTamilVocab different characters. 
**SHAPE: (len(train_source), max_decSeqLen, sizeofTamilVocab)**

For training :

In [8]:
encoder_input_data = np.zeros((len(train_source), max_encoder_seq_length), dtype="float32")
decoder_input_data = np.zeros((len(train_source), max_decoder_seq_length), dtype="float32")
decoder_target_data = np.zeros((len(train_source), max_decoder_seq_length, num_decoder_tokens), dtype="float32")

for i, (input_text, target_text) in enumerate(zip(train_source, train_target)):

    for t, char in enumerate(input_text):
        encoder_input_data[i, t] = input_token_index[char]
  
    for t, char in enumerate(target_text):

        # decoder_target_data is ahead of decoder_input_data by one timestep
        if t < len(target_text) - 1: 
            # we only consider until before the end character. 
            decoder_input_data[i, t] = target_token_index[char]
        if t > 0:
            # decoder_target_data will be ahead by one timestep and will not include the start character.
            decoder_target_data[i, t - 1, target_token_index[char]] = 1.0


For Validation and testing:

In [9]:
val_max_encoder_seq_length = max([len(txt) for txt in val_source])
val_max_decoder_seq_length = max([len(txt) for txt in val_target])

val_encoder_input_data = np.zeros((len(val_source), val_max_encoder_seq_length), dtype="float32")
val_decoder_input_data = np.zeros((len(val_source), val_max_decoder_seq_length), dtype="float32")
val_decoder_target_data = np.zeros((len(val_source), val_max_decoder_seq_length), dtype="float32")

for i, (input_text, target_text) in enumerate(zip(val_source, val_target)):
    for t, char in enumerate(input_text):
        val_encoder_input_data[i, t] = input_token_index[char]
  
    for t, char in enumerate(target_text):
        # decoder_target_data is ahead of decoder_input_data by one timestep

        if t < len(target_text) - 1:
            # we only consider until before the end character.
            val_decoder_input_data[i, t] = target_token_index[char]
        if t > 0:
            # decoder_target_data will be ahead by one timestep and will not include the start character.
            val_decoder_target_data[i, t - 1] = target_token_index[char]



In [10]:
#(TODO 2): Verify decoder input and target have an offset of one
print(val_decoder_input_data[26])
print(val_decoder_target_data[26])

[ 0.  3. 16. 37. 23. 47.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.]
[ 3. 16. 37. 23. 47.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.]


In [32]:
x_test = [val_encoder_input_data, val_decoder_input_data]
y_test = val_decoder_target_data

Configuring the Sweep Hyperparameter dictionary

Defining the model

In [29]:
class MyRNN(object):
  def __init__(self,cell_type = 'RNN',in_emb = 32, hidden_size=32, learning_rate= 1e-3, 
               dropout=0.4,pred_type ='greedy',epochs = 10, batch_size = 32,beam_width = 5,
               num_enc_dec =2):
    
    self.cell_type = cell_type
    self.in_emb = in_emb
    self.hidden_size = hidden_size
    self.learning_rate = learning_rate
    self.dropout = dropout
    self.pred_type = pred_type
    self.epochs = epochs
    self.batch_size = batch_size
    self.beam_width = beam_width
    self.num_enc_dec = num_enc_dec

  def build_fit(self,encoder_input_data,decoder_input_data,decoder_target_data,x_test, y_test):
    # Define an input sequence and process it.
    encoder_inputs = Input(shape=(None, ))

    # Add an Embedding layer expecting input vocab of size num_encoder_tokens, and
    # output embedding dimension of size in_enc.
    enc_emb =  Embedding(num_encoder_tokens, self.in_emb , mask_zero = True)(encoder_inputs)

    encoder_outputs = enc_emb
    if self.cell_type == 'LSTM':
        # Add a LSTM layer with hidden_size internal units.
        if self.num_enc_dec == 1:
          encoder_lstm = LSTM(self.hidden_size, return_state=True,dropout = self.dropout, return_sequences=True, name="Enc_LSTM_1")
          encoder_outputs, state_h, state_c = encoder_lstm(encoder_outputs)
          encoder_states = [state_h, state_c]
        
        elif self.num_enc_dec == 2:
          encoder_lstm_1 = LSTM(self.hidden_size, return_state=True,dropout = self.dropout, return_sequences=True, name="Enc_LSTM_1")
          encoder_outputs_1, state_h_1, state_c_1 = encoder_lstm_1(encoder_outputs)
          encoder_states_1 = [state_h_1, state_c_1]

          encoder_lstm = LSTM(self.hidden_size, return_state=True,dropout = self.dropout, return_sequences=True, name="Enc_LSTM_2")
          encoder_outputs, state_h, state_c = encoder_lstm(encoder_outputs_1)
          encoder_states = [state_h, state_c]
          
        elif self.num_enc_dec == 3:
          encoder_lstm_1 = LSTM(self.hidden_size, return_state=True,dropout = self.dropout, return_sequences=True, name="Enc_LSTM_1")
          encoder_outputs_1, state_h_1, state_c_1 = encoder_lstm_1(encoder_outputs)
          encoder_states_1 = [state_h_1, state_c_1]
          
          encoder_lstm_2 = LSTM(self.hidden_size, return_state=True,dropout = self.dropout, return_sequences=True, name="Enc_LSTM_2")
          encoder_outputs_2, state_h_2, state_c_2 = encoder_lstm_2(encoder_outputs_1)
          encoder_states_2 = [state_h_2, state_c_2]
          
          encoder_lstm = LSTM(self.hidden_size, return_state=True,dropout = self.dropout, return_sequences=True, name="Enc_LSTM_3")
          encoder_outputs, state_h, state_c = encoder_lstm(encoder_outputs_2)
          encoder_states = [state_h, state_c]
    
    elif self.cell_type == 'GRU':
        if self.num_enc_dec == 1:
          encoder_gru = GRU(self.hidden_size, return_state=True,dropout = self.dropout, return_sequences=True, name="Enc_GRU_1")
          encoder_outputs, state_h = encoder_gru(encoder_outputs)
          encoder_states = [state_h]
        
        elif self.num_enc_dec == 2:
          encoder_gru_1 = GRU(self.hidden_size, return_state=True,dropout = self.dropout, return_sequences=True, name="Enc_GRU_")
          encoder_outputs_1, state_h_1 = encoder_gru_1(encoder_outputs)
          encoder_states_1 = [state_h_1]

          encoder_gru = GRU(self.hidden_size, return_state=True,dropout = self.dropout, return_sequences=True, name="Enc_GRU_2")
          encoder_outputs, state_h = encoder_gru(encoder_outputs_1)
          encoder_states = [state_h]
          
        elif self.num_enc_dec == 3:
          encoder_gru_1 = GRU(self.hidden_size, return_state=True,dropout = self.dropout, return_sequences=True, name="Enc_GRU_1")
          encoder_outputs_1,state_h_1 = encoder_gru_1(encoder_outputs)
          encoder_states_1 = [state_h_1]
          
          encoder_gru_2 = GRU(self.hidden_size, return_state=True,dropout = self.dropout, return_sequences=True, name="Enc_GRU_2")
          encoder_outputs_2, state_h_2 = encoder_gru_2(encoder_outputs_1)
          encoder_states_2 = [state_h_2]
          
          encoder_gru = GRU(self.hidden_size, return_state=True,dropout = self.dropout, return_sequences=True, name="Enc_GRU_3")
          encoder_outputs, state_h = encoder_gru(encoder_outputs_2)
          encoder_states = [state_h]

    elif self.cell_type == 'RNN':
        if self.num_enc_dec == 1:
          encoder_rnn = SimpleRNN(self.hidden_size, return_state=True,dropout = self.dropout, return_sequences=True, name="Enc_RNN_1")
          encoder_outputs, state_h = encoder_rnn(encoder_outputs)
          encoder_states = [state_h]
        
        elif self.num_enc_dec == 2:
          encoder_rnn_1 = SimpleRNN(self.hidden_size, return_state=True,dropout = self.dropout, return_sequences=True, name="Enc_RNN_")
          encoder_outputs_1, state_h_1 = encoder_rnn_1(encoder_outputs)
          encoder_states_1 = [state_h_1]

          encoder_rnn = SimpleRNN(self.hidden_size, return_state=True,dropout = self.dropout, return_sequences=True, name="Enc_RNN_2")
          encoder_outputs, state_h = encoder_rnn(encoder_outputs_1)
          encoder_states = [state_h]
          
        elif self.num_enc_dec == 3:
          encoder_rnn_1 = SimpleRNN(self.hidden_size, return_state=True,dropout = self.dropout, return_sequences=True, name="Enc_RNN_1")
          encoder_outputs_1,state_h_1 = encoder_rnn_1(encoder_outputs)
          encoder_states_1 = [state_h_1]
          
          encoder_rnn_2 = SimpleRNN(self.hidden_size, return_state=True,dropout = self.dropout, return_sequences=True, name="Enc_RNN_2")
          encoder_outputs_2, state_h_2 = encoder_rnn_2(encoder_outputs_1)
          encoder_states_2 = [state_h_2]
          
          encoder_rnn = SimpleRNN(self.hidden_size, return_state=True,dropout = self.dropout, return_sequences=True, name="Enc_RNN_3")
          encoder_outputs, state_h = encoder_rnn(encoder_outputs_2)
          encoder_states = [state_h]


    # Set up the decoder, using `encoder_states` as initial state.
    decoder_inputs = Input(shape=(None,))
    dec_emb_layer = Embedding(num_decoder_tokens, self.hidden_size, mask_zero = True)
    dec_emb = dec_emb_layer(decoder_inputs)
    # We set up our decoder to return full output sequences,
    # and to return internal states as well. We don't use the
    # return states in the training model, but we will use them in inference.
    decoder_outputs = dec_emb
    if self.cell_type == 'LSTM':
    
      if self.num_enc_dec == 1:
        decoder_lstm = LSTM(self.hidden_size, return_sequences=True, return_state=True,dropout = self.dropout, name="Dec_LSTM_1")
        decoder_outputs, _, _ = decoder_lstm(decoder_outputs, initial_state = encoder_states)

      elif self.num_enc_dec == 2:
        decoder_lstm_1 = LSTM(self.hidden_size, return_sequences=True, return_state=True,dropout = self.dropout, name="Dec_LSTM_1")
        decoder_outputs_1, _, _ = decoder_lstm_1(decoder_outputs, initial_state = encoder_states_1)

        decoder_lstm = LSTM(self.hidden_size, return_sequences=True, return_state=True,dropout = self.dropout, name="Dec_LSTM_2")
        decoder_outputs, _, _ = decoder_lstm(decoder_outputs_1, initial_state = encoder_states)
      
      elif self.num_enc_dec == 3:
        decoder_lstm_1 = LSTM(self.hidden_size, return_sequences=True, return_state=True,dropout = self.dropout, name="Dec_LSTM_1")
        decoder_outputs_1, _, _ = decoder_lstm_1(decoder_outputs, initial_state = encoder_states_1)

        decoder_lstm_2 = LSTM(self.hidden_size, return_sequences=True, return_state=True,dropout = self.dropout, name="Dec_LSTM_2")
        decoder_outputs_2, _, _ = decoder_lstm_2(decoder_outputs, initial_state = encoder_states_2)

        decoder_lstm = LSTM(self.hidden_size, return_sequences=True, return_state=True,dropout = self.dropout, name="Dec_LSTM_3")
        decoder_outputs, _, _ = decoder_lstm(decoder_outputs_2, initial_state = encoder_states)

    elif self.cell_type == 'GRU':

      if self.num_enc_dec == 1:
        decoder_gru = GRU(self.hidden_size, return_sequences=True, return_state=True,dropout = self.dropout, name="Dec_GRU_1")
        decoder_outputs, _ = decoder_gru(decoder_outputs, initial_state = encoder_states)

      elif self.num_enc_dec == 2:
        decoder_gru_1 = GRU(self.hidden_size, return_sequences=True, return_state=True,dropout = self.dropout, name="Dec_GRU_1")
        decoder_outputs_1, _ = decoder_gru_1(decoder_outputs, initial_state = encoder_states_1)

        decoder_gru = GRU(self.hidden_size, return_sequences=True, return_state=True,dropout = self.dropout, name="Dec_GRU_2")
        decoder_outputs, _ = decoder_gru(decoder_outputs_1, initial_state = encoder_states)
      
      elif self.num_enc_dec == 3:
        decoder_gru_1 = GRU(self.hidden_size, return_sequences=True, return_state=True,dropout = self.dropout, name="Dec_GRU_1")
        decoder_outputs_1, _ = decoder_gru_1(decoder_outputs, initial_state = encoder_states_1)

        decoder_gru_2 = GRU(self.hidden_size, return_sequences=True, return_state=True,dropout = self.dropout, name="Dec_GRU_2")
        decoder_outputs_2, _ = decoder_gru_2(decoder_outputs, initial_state = encoder_states_2)

        decoder_gru = GRU(self.hidden_size, return_sequences=True, return_state=True,dropout = self.dropout, name="Dec_GRU_3")
        decoder_outputs, _ = decoder_gru(decoder_outputs_2, initial_state = encoder_states)

    elif self.cell_type == 'RNN':

      if self.num_enc_dec == 1:
        decoder_rnn = SimpleRNN(self.hidden_size, return_sequences=True, return_state=True,dropout = self.dropout, name="Dec_RNN_1")
        decoder_outputs, _ = decoder_rnn(decoder_outputs, initial_state = encoder_states)

      elif self.num_enc_dec == 2:
        decoder_rnn_1 = SimpleRNN(self.hidden_size, return_sequences=True, return_state=True,dropout = self.dropout, name="Dec_RNN_1")
        decoder_outputs_1, _ = decoder_rnn_1(decoder_outputs, initial_state = encoder_states_1)

        decoder_rnn = SimpleRNN(self.hidden_size, return_sequences=True, return_state=True,dropout = self.dropout, name="Dec_RNN_2")
        decoder_outputs, _ = decoder_rnn(decoder_outputs_1, initial_state = encoder_states)
      
      elif self.num_enc_dec == 3:
        decoder_rnn_1 = SimpleRNN(self.hidden_size, return_sequences=True, return_state=True,dropout = self.dropout, name="Dec_RNN_1")
        decoder_outputs_1, _ = decoder_rnn_1(decoder_outputs, initial_state = encoder_states_1)

        decoder_rnn_2 = SimpleRNN(self.hidden_size, return_sequences=True, return_state=True,dropout = self.dropout, name="Dec_RNN_2")
        decoder_outputs_2, _ = decoder_rnn_2(decoder_outputs, initial_state = encoder_states_2)

        decoder_rnn = SimpleRNN(self.hidden_size, return_sequences=True, return_state=True,dropout = self.dropout, name="Dec_RNN_3")
        decoder_outputs, _ = decoder_rnn(decoder_outputs_2, initial_state = encoder_states)

    hidden = Dense(128, activation="relu")
    hidden_outputs = hidden(decoder_outputs)
    drop = Dropout(self.dropout)
    dropout_out = drop(hidden_outputs)
    decoder_dense = Dense(num_decoder_tokens, activation='softmax')
    decoder_outputs = decoder_dense(dropout_out)

    # Define the model that takes encoder and decoder input 
    # to output decoder_outputs
    model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

    model.summary()

    plot_model(model, to_file='model.png', show_shapes=True)
    
    # Define the optimizer
    optimizer = Adam(lr=self.learning_rate, beta_1=0.9, beta_2=0.999)
    model.compile(loss = "categorical_crossentropy", optimizer = optimizer, metrics=['accuracy'])
  
    model.fit(
        [encoder_input_data, decoder_input_data],
        decoder_target_data,
        batch_size=self.batch_size,
        epochs=self.epochs,
        callbacks = [WandbCallback()]
        )
    
    model.save("seq2seq")
    
    output = model.predict(x_test, batch_size = self.batch_size)
    
    global_total = 0
    global_correct = 0
    
    for i in range(len(y_test)):
      local_correct = 0
      arr = y_test[i]
      true_arr = arr[np.nonzero(arr)]
      true_length = len(true_arr)
      
      if self.pred_type == 'greedy':
        beam_width = 1
        result = self.beam_search_decoder(output[i,:,:], beam_width)
        result = result[0][0]

      elif self.pred_type == 'beam_search':  
        beam_width = self.beam_width  
        result = self.beam_search_decoder(output[i,:,:], beam_width)
        result = result[self.beam_width-1][0]  
      
      pred_arr = result[:true_length-1]
      true_arr = true_arr[:true_length-1]

      for i in range(len(pred_arr)):
        if true_arr[i] == pred_arr[i]:
          local_correct = local_correct + 1
          global_total = global_total + 1
        else:
          global_total = global_total + 1
      
      global_correct = global_correct + local_correct
      
    val_accuracy = global_correct/global_total
    print(val_accuracy)

    wandb.log({'val_accuracy' : val_accuracy})

  def beam_search_decoder(self,data, k):
    sequences = [[list(), 0.0]]
    # walk over each step in sequence
    for row in data:
      all_candidates = list()
      # expand each current candidate
      for i in range(len(sequences)):
        seq, score = sequences[i]
        for j in range(len(row)):
          candidate = [seq + [j], score - log(row[j])]
          all_candidates.append(candidate)
      # order all candidates by score
      ordered = sorted(all_candidates, key=lambda tup:tup[1])
      # select k best
      sequences = ordered[:k]
    return sequences

Sweep

In [30]:
sweep_config = {
    'method': 'bayes', 
    'metric': {
      'name': 'val_accuracy',
      'goal': 'maximize'   
    },
    'parameters': {

        'dropout': {
            'values': [0.0, 0.1, 0.2]
        },
        'learning_rate': {
            'values': [1e-3, 1e-4]
        },
        'batch_size': {
            'values': [32, 64, 128]
        },
        'in_emb': {
            'values': [32, 64, 128]
        },
        'num_enc_dec': {
            'values': [1, 2, 3]
        },
        'hidden_size':{
            'values': [32, 64, 128]
        },
        'cell_type': {
            'values': ['RNN', 'GRU', 'LSTM']
        },
        'dec_search': {
            'values': ['beam_search', 'greedy']
        },
        'beam_width':{
            'values': [5,10]
        }
    }
}


In [31]:
# Initialize a new sweep
sweep_id = wandb.sweep(sweep_config, entity="cs6910assignment3", project="RNN")

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Create sweep with ID: 4lok3n9a
Sweep URL: https://wandb.ai/cs6910assignment3/RNN/sweeps/4lok3n9a


In [39]:
def train_sweep():
  config_defaults = {
        'dropout': 0.4,
        'learning_rate': 1e-3,
        'batch_size': 32,
        'epochs' : 10,
        'in_emb': 32,
        'num_enc_dec': 2,
        'hidden_size': 32,
        'cell_type': 'RNN',
        'dec_search': 'beam_search',
        'beam_width': 5
        }

  # Initialize a new wandb run
  wandb.init(config = config_defaults)
  
  # Config is a variable that holds and saves hyperparameters and inputs
  config = wandb.config
  wandb.run.name = 'cell_type_'+ str(config.cell_type)+'_dec_search_'+ config.dec_search+'_bs_'+str(config.batch_size)
  
  model_rnn = MyRNN(cell_type = config.cell_type, in_emb = config.in_emb, hidden_size=config.hidden_size,
                learning_rate= config.learning_rate, dropout=config.dropout,pred_type = config.dec_search,epochs = config.epochs,
                batch_size = config.batch_size, beam_width = config.beam_width, num_enc_dec = config.num_enc_dec)
  
  model_rnn.build_fit(encoder_input_data,decoder_input_data,decoder_target_data,x_test, y_test)

In [None]:
wandb.agent(sweep_id, train_sweep,count=100)
#wandb.agent("4lok3n9a", entity="cs6910assignment3",project="RNN", function =train,count=100)

[34m[1mwandb[0m: Agent Starting Run: mu3wqjjc with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	beam_width: 3
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dec_search: beam_search
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	in_emb: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_enc_dec: 1


Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, None)]       0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, None)]       0                                            
__________________________________________________________________________________________________
embedding (Embedding)           (None, None, 64)     1664        input_1[0][0]                    
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, None, 64)     3072        input_2[0][0]                    
______________________________________________________________________________________________



INFO:tensorflow:Assets written to: seq2seq/assets


INFO:tensorflow:Assets written to: seq2seq/assets


0.35883778102397834


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,9.0
loss,0.59036
accuracy,0.44563
_runtime,646.0
_timestamp,1620323255.0
_step,10.0
val_accuracy,0.35884


0,1
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▄▃▃▂▂▂▁▁▁
accuracy,▁▅▅▆▆▇▇▇██
_runtime,▁▂▂▃▄▅▅▆▇██
_timestamp,▁▂▂▃▄▅▅▆▇██
_step,▁▂▂▃▄▅▅▆▇▇█
val_accuracy,▁


[34m[1mwandb[0m: Agent Starting Run: 9nfo375n with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_width: 5
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dec_search: beam_search
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	in_emb: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_enc_dec: 2


Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, None)]       0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, None)]       0                                            
__________________________________________________________________________________________________
embedding (Embedding)           (None, None, 128)    3328        input_1[0][0]                    
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, None, 64)     3072        input_2[0][0]                    
______________________________________________________________________________________________