In [29]:
import numpy as np
import pandas as pd
import os

import tensorflow 
import keras
from keras.layers import Input, LSTM, Dense, Embedding, GRU, Dropout, SimpleRNN
from keras.models import Model
from tensorflow.keras.optimizers import Adam
from math import log

In [30]:
%pip install wandb -q
import wandb
from wandb.keras import WandbCallback

In [31]:
# Download the dataset
if not os.path.exists('/content/dakshina_dataset_v1.0.tar'):
    !wget "https://storage.googleapis.com/gresearch/dakshina/dakshina_dataset_v1.0.tar"

In [32]:
!tar -xvf /content/dakshina_dataset_v1.0.tar

dakshina_dataset_v1.0/bn/
dakshina_dataset_v1.0/bn/lexicons/
dakshina_dataset_v1.0/bn/lexicons/bn.translit.sampled.test.tsv
dakshina_dataset_v1.0/bn/lexicons/bn.translit.sampled.train.tsv
dakshina_dataset_v1.0/bn/lexicons/bn.translit.sampled.dev.tsv
dakshina_dataset_v1.0/bn/native_script_wikipedia/
dakshina_dataset_v1.0/bn/native_script_wikipedia/bn.wiki-filt.valid.text.shuf.txt.gz
dakshina_dataset_v1.0/bn/native_script_wikipedia/bn.wiki-full.info.sorted.tsv.gz
dakshina_dataset_v1.0/bn/native_script_wikipedia/bn.wiki-filt.train.info.sorted.tsv.gz
dakshina_dataset_v1.0/bn/native_script_wikipedia/bn.wiki-filt.train.text.sorted.tsv.gz
dakshina_dataset_v1.0/bn/native_script_wikipedia/bn.wiki-filt.train.text.shuf.txt.gz
dakshina_dataset_v1.0/bn/native_script_wikipedia/bn.wiki-full.nonblock.sections.tsv.gz
dakshina_dataset_v1.0/bn/native_script_wikipedia/bn.wiki-full.omit_pages.txt.gz
dakshina_dataset_v1.0/bn/native_script_wikipedia/bn.wiki-full.text.sorted.tsv.gz
dakshina_dataset_v1.0/bn/na

In [33]:
train_data_path = '/content/dakshina_dataset_v1.0/te/lexicons/te.translit.sampled.train.tsv'
validation_data_path = '/content/dakshina_dataset_v1.0/te/lexicons/te.translit.sampled.dev.tsv'
test_data_path = '/content/dakshina_dataset_v1.0/te/lexicons/te.translit.sampled.test.tsv'

df_train = pd.read_csv(train_data_path, sep='\t', header=None)
df_val = pd.read_csv(validation_data_path, sep='\t', header=None)
df_test = pd.read_csv(test_data_path, sep='\t', header=None)

In [34]:
predictions_path = '/content/predictions.tsv'

In [35]:
def load_data(df, input_texts, target_texts, input_characters=None, target_characters=None, is_test_data=False):
    
    for  _, row in df.iterrows():
        input_text, target_text = str(row[0]), str(row[1])
        input_texts.append(input_text)
        target_text = '\t' + target_text + '\n'
        target_texts.append(target_text)
        
        if not is_test_data:
            for char in input_text:
                if char not in input_characters:
                    input_characters.add(char)

            for char in target_text:
                if char not in target_characters:
                    target_characters.add(char)

    max_encoder_seq_length = max([len(txt) for txt in input_texts])
    max_decoder_seq_length = max([len(txt) for txt in target_texts])

    if not is_test_data:
        input_characters = sorted(list(input_characters))
        target_characters = sorted(list(target_characters))
        num_encoder_tokens = len(input_characters)
        num_decoder_tokens = len(target_characters)

        return max_encoder_seq_length, max_decoder_seq_length, num_encoder_tokens, num_decoder_tokens
    
    return max_encoder_seq_length, max_decoder_seq_length

In [36]:
def get_vectors(input_texts, target_texts, input_token_index, target_token_index,
                max_encoder_seq_length, num_encoder_tokens, 
                max_decoder_seq_length=None, num_decoder_tokens=None,
                is_test_data=False):
      
    encoder_input_data = np.zeros((len(input_texts), max_encoder_seq_length), dtype="float32")

    if not is_test_data:
        decoder_input_data = np.zeros((len(input_texts), max_decoder_seq_length), dtype="float32")
        decoder_target_data = np.zeros((len(input_texts), max_decoder_seq_length, num_decoder_tokens), dtype="float32")

    for i, (input_text, target_text) in enumerate(zip(input_texts, target_texts)):
        for t, char in enumerate(input_text):
            encoder_input_data[i, t] = input_token_index[char]
        encoder_input_data[i, t + 1 :] = input_token_index[" "]


        if not is_test_data:
            for t, char in enumerate(target_text):
                decoder_input_data[i, t] = target_token_index[char]
                if t > 0:
                    # decoder_target_data will be ahead by one timestep and will not include the start character.
                    # decoder_target_data[i, t - 1, target_token_index[char]] = 1.0
                    decoder_target_data[i, t - 1, target_token_index[char]] = 1.0
            decoder_input_data[i, t + 1: ] = target_token_index[" "]
            decoder_target_data[i, t:, target_token_index[" "]] = 1.0    

    if is_test_data:
        return encoder_input_data

    return encoder_input_data, decoder_input_data, decoder_target_data

### Load Train, Validation and Test data

In [37]:
input_texts = []
target_texts = []

input_characters = set(' ')
target_characters = set(' ')

max_encoder_seq_length, max_decoder_seq_length, num_encoder_tokens, num_decoder_tokens = load_data(df_train, input_texts, target_texts, input_characters, target_characters)

print("Number of samples:", len(input_texts))
print("Number of unique input tokens:", num_encoder_tokens)
print("Number of unique output tokens:", num_decoder_tokens)
print("Max sequence length for inputs:", max_encoder_seq_length)
print("Max sequence length for outputs:", max_decoder_seq_length)

Number of samples: 58550
Number of unique input tokens: 64
Number of unique output tokens: 29
Max sequence length for inputs: 20
Max sequence length for outputs: 27


In [38]:
val_input_texts = []
val_target_texts = []

val_max_encoder_seq_length, val_max_decoder_seq_length, val_num_encoder_tokens, val_num_decoder_tokens = load_data(
    df_val, val_input_texts, val_target_texts, input_characters, target_characters)

print("Number of samples:", len(val_input_texts))
print("Number of unique input tokens:", val_num_encoder_tokens)
print("Number of unique output tokens:", val_num_decoder_tokens)
print("Max sequence length for inputs:", val_max_encoder_seq_length)
print("Max sequence length for outputs:", val_max_decoder_seq_length)

Number of samples: 5683
Number of unique input tokens: 64
Number of unique output tokens: 29
Max sequence length for inputs: 19
Max sequence length for outputs: 23


In [39]:
test_input_texts = []
test_target_texts = []

test_max_encoder_seq_length, test_max_decoder_seq_length = load_data(df_test, test_input_texts, test_target_texts, is_test_data=True)

print("Number of Test samples:", len(test_input_texts))
print("Test Max sequence length for inputs:", test_max_encoder_seq_length)
print("Test Max sequence length for outputs:", test_max_decoder_seq_length)

Number of Test samples: 5747
Test Max sequence length for inputs: 18
Test Max sequence length for outputs: 25


In [40]:
input_token_index = dict([(char, i) for i, char in enumerate(input_characters)])
target_token_index = dict([(char, i) for i, char in enumerate(target_characters)])

In [41]:
reverse_input_char_index = dict((i, char) for char, i in input_token_index.items())
reverse_target_char_index = dict((i, char) for char, i in target_token_index.items())

In [42]:
# encoder_input_data, decoder_input_data, decoder_target_data = get_vectors(
#     input_texts, target_texts, input_token_index, target_token_index, 
#     max_encoder_seq_length, num_encoder_tokens, max_decoder_seq_length,
#     num_decoder_tokens)

In [43]:
# val_encoder_input_data, val_decoder_input_data, val_decoder_target_data = get_vectors(
#     val_input_texts, val_target_texts, input_token_index, target_token_index,
#     val_max_encoder_seq_length, val_num_encoder_tokens, 
#     val_max_decoder_seq_length, val_num_decoder_tokens)

In [44]:
# test_encoder_input_data = get_vectors(
#     test_input_texts, test_target_texts, input_token_index, target_token_index,
#     test_max_encoder_seq_length, num_encoder_tokens, is_test_data=True)

In [45]:
encoder_input_data = np.zeros(
    (len(input_texts), max_encoder_seq_length), dtype="float32"
)
decoder_input_data = np.zeros(
    (len(input_texts), max_decoder_seq_length), dtype="float32"
)
decoder_target_data = np.zeros(
    (len(input_texts), max_decoder_seq_length, num_decoder_tokens), dtype="float32"
)

for i, (input_text, target_text) in enumerate(zip(input_texts, target_texts)):
    for t, char in enumerate(input_text):
        encoder_input_data[i, t] = input_token_index[char]
    #encoder_input_data[i, t + 1 :] = input_token_index["P"]
    encoder_input_data[i, t + 1 :] = input_token_index[" "]

    for t, char in enumerate(target_text):
        # decoder_target_data is ahead of decoder_input_data by one timestep
        decoder_input_data[i, t] = target_token_index[char]
        if t > 0:
            # decoder_target_data will be ahead by one timestep
            # and will not include the start character.
            decoder_target_data[i, t - 1, target_token_index[char]] = 1.0
    #decoder_input_data[i, t + 1: ] = target_token_index["P"]
    decoder_input_data[i, t + 1: ] = target_token_index[" "]
    #decoder_target_data[i, t:, target_token_index["P"]] = 1.0
    decoder_target_data[i, t:, target_token_index[" "]] = 1.0


val_encoder_input_data = np.zeros(
    (len(input_texts), val_max_encoder_seq_length), dtype="float32"
)
val_decoder_input_data = np.zeros(
    (len(input_texts), val_max_decoder_seq_length), dtype="float32"
)
val_decoder_target_data = np.zeros(
    (len(input_texts), val_max_decoder_seq_length, num_decoder_tokens), dtype="float32"
)

for i, (input_text, target_text) in enumerate(zip(val_input_texts, val_target_texts)):
    for t, char in enumerate(input_text):
        val_encoder_input_data[i, t] = input_token_index[char]
    #encoder_input_data[i, t + 1 :] = input_token_index["P"]
    val_encoder_input_data[i, t + 1 :] = input_token_index[" "]

    for t, char in enumerate(target_text):
        # decoder_target_data is ahead of decoder_input_data by one timestep
        val_decoder_input_data[i, t] = target_token_index[char]
        if t > 0:
            # decoder_target_data will be ahead by one timestep
            # and will not include the start character.
            val_decoder_target_data[i, t - 1, target_token_index[char]] = 1.0
    #decoder_input_data[i, t + 1: ] = target_token_index["P"]
    val_decoder_input_data[i, t + 1: ] = target_token_index[" "]
    #decoder_target_data[i, t:, target_token_index["P"]] = 1.0
    val_decoder_target_data[i, t:, target_token_index[" "]] = 1.0

In [46]:
print(input_token_index)
print(target_token_index)
print(reverse_input_char_index)
print(reverse_target_char_index)

{'ం': 0, 'ౌ': 1, 'వ': 2, 'ఐ': 3, 'ఏ': 4, 'జ': 5, 'డ': 6, 'ో': 7, 'ణ': 8, 'ఉ': 9, 'ఢ': 10, 'ీ': 11, 'ద': 12, 'ఎ': 13, 'స': 14, 'ె': 15, 'శ': 16, 'ళ': 17, 'ై': 18, 'ా': 19, 'ఔ': 20, 'ః': 21, '\u200c': 22, 'ల': 23, 'ర': 24, 'భ': 25, 'ఠ': 26, 'క': 27, 'ఒ': 28, 'ృ': 29, 'థ': 30, 'ొ': 31, 'హ': 32, 'య': 33, 'ఖ': 34, 'మ': 35, 'న': 36, 'గ': 37, 'ష': 38, 'ట': 39, 'ఫ': 40, 'ఛ': 41, 'బ': 42, 'త': 43, 'ఞ': 44, 'ఋ': 45, ' ': 46, 'ి': 47, 'ఘ': 48, 'ఆ': 49, 'ఊ': 50, 'ఇ': 51, 'ే': 52, 'అ': 53, 'చ': 54, 'ప': 55, 'ధ': 56, 'ఱ': 57, 'ూ': 58, '్': 59, 'ు': 60, 'ఝ': 61, 'ఈ': 62, 'ఓ': 63}
{'x': 0, 'w': 1, 'a': 2, 'm': 3, 'p': 4, 'v': 5, 'h': 6, 't': 7, 'i': 8, 'n': 9, 'l': 10, 'r': 11, 'y': 12, 'z': 13, 'c': 14, 'q': 15, 'u': 16, 'j': 17, ' ': 18, '\n': 19, '\t': 20, 'b': 21, 's': 22, 'o': 23, 'f': 24, 'e': 25, 'k': 26, 'g': 27, 'd': 28}
{0: 'ం', 1: 'ౌ', 2: 'వ', 3: 'ఐ', 4: 'ఏ', 5: 'జ', 6: 'డ', 7: 'ో', 8: 'ణ', 9: 'ఉ', 10: 'ఢ', 11: 'ీ', 12: 'ద', 13: 'ఎ', 14: 'స', 15: 'ె', 16: 'శ', 17: 'ళ', 18: 'ై', 19: 'ా', 20:

In [47]:
x_test, y_test = val_encoder_input_data, val_target_texts

In [48]:
with open(predictions_path, 'w') as f:
    f.write('Telugu\tPredicted\tActual\n')

# Build Model

In [49]:
class CustomRNN(object):

    def __init__(self,cell_type = 'GRU',in_emb = 32, hidden_size=32, learning_rate= 1e-3, 
                dropout=0.4,pred_type ='greedy',epochs = 10, batch_size = 32,beam_width = 5,
                num_enc = 1,num_dec = 1):
      
      self.cell_type = cell_type
      self.in_emb = in_emb
      self.hidden_size = hidden_size
      self.learning_rate = learning_rate
      self.dropout = dropout
      self.pred_type = pred_type
      self.epochs = epochs
      self.batch_size = batch_size
      self.beam_width = beam_width
      self.num_enc = num_enc
      self.num_dec = num_dec

    def build_fit(self, encoder_input_data, decoder_input_data, decoder_target_data, x_test, y_test):
      
      # Define an input sequence and process it.
      encoder_inputs = Input(shape=(None, ), name='Enc_inputs')

      # Add an Embedding layer expecting input vocab of size num_encoder_tokens, and output embedding dimension of size in_enc.
      enc_emb = Embedding(num_encoder_tokens, self.in_emb, mask_zero=True, name='Enc_emb')(encoder_inputs)
      encoder_outputs = enc_emb

      customFunction = None
      if self.cell_type == 'LSTM':
          customFunction = LSTM
      elif self.cell_type == 'GRU':
          customFunction = GRU
      elif self.cell_type == 'RNN':
          customFunction = SimpleRNN
      
      encoder_lstm = customFunction(self.hidden_size, return_state=True, dropout = self.dropout, return_sequences=True, name="Enc_hidden_1")
      if self.cell_type == 'LSTM':
        encoder_outputs, state_h, state_c = encoder_lstm(encoder_outputs)
        encoder_states = [state_h, state_c]
      else:
        encoder_outputs, state_h = encoder_lstm(encoder_outputs)
        encoder_states = [state_h]

      # Add a LSTM layer with hidden_size internal units.
      for i in range(2, self.num_enc + 1):
        layer_name = ('Enc_hidden_%d') %i
        encoder_lstm = customFunction(self.hidden_size, return_state=True, dropout = self.dropout, return_sequences=True, name=layer_name)
        if self.cell_type == 'LSTM':
          encoder_outputs, state_h, state_c = encoder_lstm(encoder_outputs, initial_state=encoder_states)
          encoder_states = [state_h, state_c]
        else:
          encoder_outputs, state_h = encoder_lstm(encoder_outputs, initial_state=encoder_states)
          encoder_states = [state_h]

      # Set up the decoder, using `encoder_states` as initial state.
      decoder_inputs = Input(shape=(None,), name='Dec_inputs')
      dec_emb_layer = Embedding(num_decoder_tokens, self.hidden_size, mask_zero=True, name='Dec_emb')
      dec_emb = dec_emb_layer(decoder_inputs)

      # We set up our decoder to return full output sequences, and to return internal states as well. 
      # We don't use the return states in the training model, but we will use them in inference.
      decoder_outputs = dec_emb

      decoder_lstm = customFunction(self.hidden_size, return_sequences=True, return_state=True, dropout = self.dropout, name="Dec_hidden_1")
      if self.cell_type == 'LSTM':
        decoder_outputs, _, _ = decoder_lstm(decoder_outputs, initial_state=encoder_states)
      else:
        decoder_outputs, _ = decoder_lstm(decoder_outputs, initial_state=encoder_states)

      for i in range(2, self.num_dec + 1):
        layer_name = ('Dec_hidden_%d') %i
        decoder_lstm = customFunction(self.hidden_size, return_sequences=True, return_state=True, dropout=self.dropout, name=layer_name)
        if self.cell_type == 'LSTM':
          decoder_outputs, _, _ = decoder_lstm(decoder_outputs, initial_state=encoder_states)
        else:
          decoder_outputs, _ = decoder_lstm(decoder_outputs, initial_state=encoder_states)

      decoder_dense = Dense(num_decoder_tokens, activation='softmax', name='dense')
      decoder_outputs = decoder_dense(decoder_outputs)

      # Define the model that takes encoder and decoder input to output decoder_outputs
      model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

      model.summary()

      #plot_model(model, to_file='model.png', show_shapes=True)
      
      # Define the optimizer
      optimizer = Adam(learning_rate=self.learning_rate, beta_1=0.9, beta_2=0.999)
      model.compile(loss = "categorical_crossentropy", optimizer = optimizer, metrics=['accuracy'])
    
      print('fitting the model....')

      model.fit(
          [encoder_input_data, decoder_input_data],
          decoder_target_data,
          batch_size=self.batch_size,
          epochs=self.epochs,
          callbacks = [WandbCallback()]
          )
      
      #model.save("s2s")
      
      #model = keras.models.load_model("s2s")

      print('before the inference of the model....')
      
      encoder_model, decoder_model = self.inference_model(model)

      print('getting the inference of the model....')

      global_total = 0
      global_correct = 0
      for i in range(len(val_input_texts)):
        #input_seq = val_encoder_input_data[i : i + 1]
        input_seq = x_test[i : i + 1]
        result = self.decode_sequence(encoder_model,decoder_model,input_seq)
        #target = val_target_texts[i]
        target = y_test[i]
        target = target[1:len(target)-1]
        result = result[0:len(result)-1]
        with open(predictions_path, 'a') as f:
            f.write('{}\t{}\t{}\n'.format(df_val[0][i], result.strip(), target.strip()))
        #print("Target: %s \n Result: %s" % (target, result))

        if result.strip() == target.strip():
          global_correct = global_correct + 1
        
        global_total = global_total + 1
        accuracy_epoch = global_correct/global_total
        if global_total % 50 == 0:
          wandb.log({'epoch_accuracy' : accuracy_epoch})
        #print("Accuracy: %s" % (accuracy_epoch))
      
      val_accuracy = global_correct/global_total
      #print(val_accuracy)


      wandb.log({'val_accuracy' : val_accuracy})
      
    def inference_model(self,model):
      encoder_inputs = model.input[0]  # input_1
      if self.cell_type == 'RNN' or self.cell_type == 'GRU':
        encoder_outputs, state_h_enc = model.get_layer('Enc_hidden_'+ str(self.num_enc)).output
        encoder_states = [state_h_enc]
        encoder_model = Model(encoder_inputs, encoder_states)

        decoder_inputs = model.input[1]  # input_1
        decoder_outputs = model.get_layer('Dec_emb')(decoder_inputs)
        decoder_states_inputs = []
        decoder_states = []

        for i in range(1,self.num_dec +1):
          decoder_state_input_h = keras.Input(shape=(self.hidden_size,))
          curr_states_inputs = [decoder_state_input_h]
          decoder = model.get_layer('Dec_hidden_'+ str(i))
          decoder_outputs, state_h_dec = decoder(decoder_outputs, initial_state=curr_states_inputs)

          decoder_states += [state_h_dec]
          decoder_states_inputs += curr_states_inputs

      elif self.cell_type == 'LSTM':
        encoder_outputs, state_h_enc, state_c_enc = model.get_layer('Enc_hidden_'+ str(self.num_enc)).output  # lstm_1
        encoder_states = [state_h_enc, state_c_enc]
        encoder_model = Model(encoder_inputs, encoder_states)

        decoder_inputs = model.input[1]  # input_1
        decoder_outputs = model.get_layer('Dec_emb')(decoder_inputs)
        decoder_states_inputs = []
        decoder_states = []

        for i in range(1,self.num_dec +1):
          decoder_state_input_h = keras.Input(shape=(self.hidden_size,))
          decoder_state_input_c = keras.Input(shape=(self.hidden_size,))
          curr_states_inputs = [decoder_state_input_h, decoder_state_input_c]
          decoder = model.get_layer('Dec_hidden_'+ str(i))
          decoder_outputs, state_h_dec, state_c_dec = decoder(decoder_outputs, initial_state=curr_states_inputs)

          decoder_states += [state_h_dec, state_c_dec]
          decoder_states_inputs += curr_states_inputs


      decoder_dense = model.get_layer('dense')
      decoder_outputs = decoder_dense(decoder_outputs)
      decoder_model = Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states)

      return encoder_model,decoder_model

    def decode_sequence(self,encoder_model,decoder_model,input_seq):
      # Encode the input as state vectors.
      states_value = [encoder_model.predict(input_seq)] * self.num_dec
      
      # Generate empty target sequence of length 1.
      target_seq = np.zeros((1, 1))
      # Populate the first character of target sequence with the start character.
      target_seq[0, 0] = target_token_index['b']

      # Sampling loop for a batch of sequences
      # (to simplify, here we assume a batch of size 1).
      stop_condition = False
      decoded_sentence = ""

      while not stop_condition:
          if self.cell_type == 'RNN' or self.cell_type == 'GRU':
            dummy = decoder_model.predict([target_seq] + [states_value])
            output_tokens, states_value = dummy[0],dummy[1:]
            
          elif self.cell_type == 'LSTM':  
            dummy = decoder_model.predict([target_seq] + states_value)
            output_tokens, states_value = dummy[0],dummy[1:]
          
          #print(output_tokens[0,:,:])
          if self.pred_type == 'greedy':
            beam_w = 1
          elif self.pred_type == 'beam_search':
            beam_w = self.beam_width
          sampled_token_index = self.beam_search_decoder(output_tokens[0,:,:], beam_w)
          sampled_token_index = sampled_token_index[beam_w-1][0]

          # Sample a token
          sampled_token_index = np.argmax(output_tokens[0, -1, :])
          sampled_char = reverse_target_char_index[sampled_token_index]
          decoded_sentence += sampled_char

          # Exit condition: either hit max length
          # or find stop character.
          if sampled_char == 'E' or len(decoded_sentence) > max_decoder_seq_length:
              stop_condition = True

          # Update the target sequence (of length 1).
          target_seq = np.zeros((1, 1))
          target_seq[0, 0] = sampled_token_index

          # Update state

      return decoded_sentence
    
    def beam_search_decoder(self,data, k):
      sequences = [[list(), 0.0]]
      # walk over each step in sequence
      for row in data:
        all_candidates = list()
        # expand each current candidate
        for i in range(len(sequences)):
          seq, score = sequences[i]
          for j in range(len(row)):
            candidate = [seq + [j], score - log(row[j])]
            #candidate = [seq + [j], score - log1p(row[j])]
            all_candidates.append(candidate)
        # order all candidates by score
        ordered = sorted(all_candidates, key=lambda tup:tup[1])
        # select k best
        sequences = ordered[:k]
      return sequences

# Sweep Config

In [50]:
sweep_config = {
    'method': 'bayes', 
    'metric': {
      'name': 'val_accuracy',
      'goal': 'maximize'   
    },
    'parameters': {

        'dropout': {
            'values': [0.0, 0.1, 0.2]
        },
        'learning_rate': {
            'values': [1e-3, 1e-4]
        },
        'batch_size': {
            'values': [64, 128]
        },
        'in_emb': {
            'values': [32, 64, 128]
        },
        'num_enc': {
            'values': [1]
        },
        'num_dec': {
            'values': [1]
        },
        'hidden_size':{
            'values': [32]
        },
        'cell_type': {
            # 'values': ['RNN', 'GRU', 'LSTM']
            'values': ['GRU']
        },
        'dec_search': {
              'values': ['greedy']
            # 'values': ['beam_search', 'greedy']
        },
        'beam_width':{
            'values': [1]
        },
        'epochs': {
            'values': [10]
        }
    }
}

In [51]:
sweep_id = wandb.sweep(sweep_config, entity="cs21m010-cs21m041", project="DL_Assignment_3_a")

Create sweep with ID: xakmj1sd
Sweep URL: https://wandb.ai/cs21m010-cs21m041/DL_Assignment_3_a/sweeps/xakmj1sd


In [52]:
def train():

    # Create a new WandB run
    wandb.init(config=sweep_config)
    
    # Construct the run name
    config = wandb.config
    wandb.run.name = str(config.cell_type) + '_' + config.dec_search + '_bs_' + str(config.batch_size)
    
    model_rnn = CustomRNN(cell_type = config.cell_type, in_emb = config.in_emb, hidden_size=config.hidden_size,
                  learning_rate= config.learning_rate, dropout=config.dropout,pred_type = config.dec_search,epochs = config.epochs,
                  batch_size = config.batch_size, beam_width = config.beam_width, num_enc = config.num_enc,num_dec = config.num_dec)
    
    model_rnn.build_fit(encoder_input_data,decoder_input_data,decoder_target_data,x_test, y_test)

In [None]:
wandb.agent(sweep_id, train, count = 1)

[34m[1mwandb[0m: Agent Starting Run: kq4m1a2s with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	beam_width: 1
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dec_search: greedy
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	in_emb: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_dec: 1
[34m[1mwandb[0m: 	num_enc: 1


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 Enc_inputs (InputLayer)        [(None, None)]       0           []                               
                                                                                                  
 Dec_inputs (InputLayer)        [(None, None)]       0           []                               
                                                                                                  
 Enc_emb (Embedding)            (None, None, 64)     4096        ['Enc_inputs[0][0]']             
                                                                                                  
 Dec_emb (Embedding)            (None, None, 32)     928         ['Dec_inputs[0][0]']             
                                                                                              