In [1]:
!pip install wandb

Collecting wandb
  Downloading wandb-0.12.16-py2.py3-none-any.whl (1.8 MB)
[?25l[K     |▏                               | 10 kB 30.6 MB/s eta 0:00:01[K     |▍                               | 20 kB 24.1 MB/s eta 0:00:01[K     |▌                               | 30 kB 12.7 MB/s eta 0:00:01[K     |▊                               | 40 kB 10.0 MB/s eta 0:00:01[K     |█                               | 51 kB 5.9 MB/s eta 0:00:01[K     |█                               | 61 kB 7.0 MB/s eta 0:00:01[K     |█▎                              | 71 kB 7.5 MB/s eta 0:00:01[K     |█▌                              | 81 kB 7.0 MB/s eta 0:00:01[K     |█▋                              | 92 kB 7.8 MB/s eta 0:00:01[K     |█▉                              | 102 kB 6.8 MB/s eta 0:00:01[K     |██                              | 112 kB 6.8 MB/s eta 0:00:01[K     |██▏                             | 122 kB 6.8 MB/s eta 0:00:01[K     |██▍                             | 133 kB 6.8 MB/s eta 0:00:01

In [2]:
from tqdm import tqdm
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

import tensorflow as tf
from keras.models import Model
from keras.layers import Layer
from keras.layers import SimpleRNN, LSTM, GRU, Dense
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
from tensorflow.keras.optimizers import Adam
from keras.losses import SparseCategoricalCrossentropy

### Load Data

In [None]:
TRAIN_PATH = "hi.translit.sampled.train.tsv"
VAL_PATH = "hi.translit.sampled.dev.tsv"
TEST_PATH = "hi.translit.sampled.test.tsv"

df = pd.read_csv(TRAIN_PATH, sep="\t", header=None, error_bad_lines=False).dropna()
hindi_words = [list(f'\t{word}\n') for word in df[0].tolist()]
english_words = [list(f'\t{word}\n') for word in df[1].tolist()]
# Creating sorted vocabulary of source and target language
english_characters = sorted(list(set([char for word in english_words for char in word])))
hindi_characters = sorted(list(set([char for word in hindi_words for char in word])))
# Creating essential parameters
max_encoder_seq_length = (np.max([len(i) for i in english_words]))
max_decoder_seq_length = (np.max([len(i) for i in hindi_words]))
num_encoder_tokens = len(english_characters)
num_decoder_tokens = len(hindi_characters)
# Mapping each character of vocabulary to index and vice versa
input_token_index = dict([(char, i) for i, char in enumerate(english_characters)])
inverse_input_token_index = {v: k for k, v in input_token_index.items()}
target_token_index = dict([(char, i) for i, char in enumerate(hindi_characters)])
inverse_target_token_index = {v: k for k, v in target_token_index.items()}

def load_encoder_decoder_data(filepath, sep='\t', header=None):
  df = pd.read_csv(filepath, sep=sep, header=header, error_bad_lines=False).dropna()
  decoder_target_data = np.zeros((df.shape[0],max_decoder_seq_length,num_decoder_tokens), dtype="float32")
  for i, hindi_text in enumerate(df[0].tolist()):
      hindi_text = f'\t{hindi_text}\n'
      for t, char in enumerate(hindi_text):
          if t > 0:
              decoder_target_data[i, t - 1, target_token_index[char]] = 1.0
      decoder_target_data[i, t:, target_token_index["\n"]] = 1.0
  encoder_input_data = []
  for word in df[1].tolist():
    encoder_input_data.append([input_token_index[char] for char in f'\t{word}\n'])
  encoder_input_data = sequence.pad_sequences(encoder_input_data, maxlen=max_encoder_seq_length, padding="post")
  decoder_input_data = []
  for word in df[0].tolist():
    decoder_input_data.append([target_token_index[char] for char in f'\t{word}\n'])
  decoder_input_data = sequence.pad_sequences(decoder_input_data, maxlen=max_decoder_seq_length, padding="post")
  return encoder_input_data, decoder_input_data, decoder_target_data

# Train split
train_encoder_input_data, train_decoder_input_data, train_decoder_target = load_encoder_decoder_data(TRAIN_PATH)
# Validation split
val_encoder_input_data, val_decoder_input_data, val_decoder_target = load_encoder_decoder_data(VAL_PATH)
# Test split
test_encoder_input_data, test_decoder_input_data, test_decoder_target = load_encoder_decoder_data(TEST_PATH)

buffer_size = 100000

### Define Model

In [4]:
class BahdanauAttention(Layer):
  def __init__(self, units):
    super().__init__()
    self.W_decoder = Dense(units)
    self.W_encoder = Dense(units)
    self.W_aggregate = Dense(1)

  def call(self, query, values):
    query_across_time = tf.expand_dims(query, 1)
    decoder_transform = self.W_decoder(query_across_time)
    encoder_transform = self.W_encoder(values)
    score = tf.nn.tanh(encoder_transform + decoder_transform)
    score = self.W_aggregate(score)
    attention_weights = tf.nn.softmax(score, axis=1)
    context = tf.reduce_sum((attention_weights * values), axis=1)
    return context, attention_weights


class Encoder(Model):
  def __init__(self, config):
    super(Encoder, self).__init__()
    self.config = config
    
    # Embedding layer: (num_encoder_tokens, input_embedding_size)
    self.encoder_embedding = Embedding(num_encoder_tokens, self.config.input_embedding_size, name='Encoder_embeddings')
    
    # Adding encoder layer
    self.get_cell = {
        'rnn': SimpleRNN(self.config.hidden_units, dropout=self.config.dropout, 
                   return_sequences=True, return_state=True,
                   recurrent_initializer=self.config.initializer, name=f"Encoder_0"),
        'gru': GRU(self.config.hidden_units, dropout=self.config.dropout, 
                   return_sequences=True, return_state=True,
                   recurrent_initializer=self.config.initializer, name=f"Encoder_0"),
        'lstm': LSTM(self.config.hidden_units, dropout=self.config.dropout, 
                   return_sequences=True, return_state=True,
                   recurrent_initializer=self.config.initializer, name=f"Encoder_0"),
    }
    self.encoder_cell = self.get_cell[self.config.cell_type]


  def call(self, encoder_input, hidden):
    encoder_embedded = self.encoder_embedding(encoder_input)
    if self.config.cell_type in ['rnn', 'gru']:
        output, state = self.get_cell[self.config.cell_type](encoder_embedded, initial_state=hidden)
    elif self.config.cell_type == "lstm":
        output, state, context= self.get_cell[self.config.cell_type](encoder_embedded, initial_state=hidden)
    return output, state

  def init_hidden_states(self):
      if self.config.cell_type in ['rnn', 'gru']:
          return tf.zeros((self.config.batch_size, self.config.hidden_units))
      else:
        return (tf.zeros((self.config.batch_size, self.config.hidden_units)),
                  tf.zeros((self.config.batch_size, self.config.hidden_units)))
        
class Decoder(Model):
  def __init__(self, config):
    super(Decoder, self).__init__()
    self.config = config
    # Attention Layer
    self.attention = BahdanauAttention(self.config.hidden_units)
    
    # Embedding layer: (num_decoder_tokens, input_embedding_size)
    self.decoder_embedding = Embedding(num_decoder_tokens, self.config.input_embedding_size)
    
    # Output dense layer: (num_decoder_tokens)
    self.decoder_dense = Dense(num_decoder_tokens)
    
    # Adding decoder layer
    self.get_cell = {
        'rnn': SimpleRNN(self.config.hidden_units, dropout=self.config.dropout, 
                   return_sequences=True, return_state=True,
                   recurrent_initializer=self.config.initializer, name=f"Encoder_0"),
        'gru': GRU(self.config.hidden_units, dropout=self.config.dropout, 
                   return_sequences=True, return_state=True,
                   recurrent_initializer=self.config.initializer, name=f"Encoder_0"),
        'lstm': LSTM(self.config.hidden_units, dropout=self.config.dropout, 
                   return_sequences=True, return_state=True,
                   recurrent_initializer=self.config.initializer, name=f"Encoder_0"),
    }
    self.decoder_cell = self.get_cell[self.config.cell_type]

  def call(self, decoder_inputs, hidden, encoder_output):
    context, attention_weights = self.attention(hidden, encoder_output)
    decoder_embedded = self.decoder_embedding(decoder_inputs)
    concatenated = tf.concat([tf.expand_dims(context, 1), decoder_embedded], axis=-1)
    if self.config.cell_type in ['rnn', 'gru']:
        output, state = self.get_cell[self.config.cell_type](concatenated)
    elif self.config.cell_type == "lstm":
        output, state, context= self.get_cell[self.config.cell_type](concatenated)
    output = self.decoder_dense(tf.reshape(output, (-1, output.shape[2])))
    return output, state, attention_weights

In [12]:
class TransliterationAttentionModel:
    def __init__(self, config):
        self.config = config
        self.encoder = Encoder(self.config)
        self.decoder = Decoder(self.config)
        self.optimizer = Adam(learning_rate=self.config.learning_rate)
        self.loss_func = SparseCategoricalCrossentropy(from_logits=True, reduction='none')

    @tf.function()    
    def train_one_batch(self, encoder_inputs, decoder_targets):
        loss = 0
        # Initialize hidden states to zeroes
        encoder_hidden = self.encoder.init_hidden_states()
        with tf.GradientTape() as tape:
            encoder_output, decoder_hidden = self.encoder(encoder_inputs, encoder_hidden)
            decoder_input = tf.expand_dims([target_token_index['\t']] * self.config.batch_size, 1)
            for t in range(1, decoder_targets.shape[1]):
                decoder_output, decoder_hidden, attention_weights = self.decoder(decoder_input, decoder_hidden, encoder_output)
                batch_loss = self.loss_func(decoder_targets[:, t], decoder_output)
                # Ignore pad index
                batch_loss *= tf.cast(tf.math.logical_not(tf.math.equal(decoder_targets[:, t], 0)), dtype=batch_loss.dtype)
                batch_loss = tf.reduce_mean(batch_loss)
                dec_input = tf.expand_dims(decoder_targets[:, t], 1)
                loss += batch_loss
        gradients = tape.gradient(loss, self.encoder.trainable_variables + self.decoder.attention.trainable_variables + self.decoder.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.encoder.trainable_variables + self.decoder.attention.trainable_variables + self.decoder.trainable_variables))
        return (loss / decoder_targets.shape[1])

    def train(self, dataset, batch_size, num_batches):
        for epoch in range(1, self.config.epochs+1):
            loss = 0
            # Iterate batches over dataset and train 
            for (batch, (encoder_inputs, decoder_targets)) in tqdm(enumerate(dataset.take(num_batches))):
                batch_loss = self.train_one_batch(encoder_inputs, decoder_targets)
                loss += batch_loss 
            print(f'Epoch {epoch} Loss {loss/num_batches:.4f}')        
    
    def calculate_accuracy(self, split='val'):
        if split == 'val':
            total,correct = 0, 0
            for i in range(len(val_decoder_input_data)):
                gt = ""
                for token in val_decoder_input_data[i][1:]:
                    if inverse_target_token_index[token]=="\n":
                        break
                    gt += inverse_target_token_index[token]
                pred = self.get_predicted_word(val_encoder_input_data[i])
                if gt == pred:
                    correct += 1
                total += 1
                if i % 100 == 0:
                    print(" gt: ",gt," pred: ",pred)
                    validation_word_accuracy=correct/total
                    wandb.log({"running_validation_word_accuracy": validation_word_accuracy})
            word_val_accuracy = correct / total
            print("validation accuracy: ",word_val_accuracy)
            wandb.log({'word_val_acc' : word_val_accuracy})
        else:
            total,correct = 0, 0
            for i in range(len(test_decoder_input_data)):
                gt = ""
                for token in test_decoder_input_data[i][1:]:
                    if inverse_target_token_index[token]=="\n":
                        break
                    gt += inverse_target_token_index[token]
                pred = self.get_predicted_word(test_encoder_input_data[i])
                if gt == pred:
                    correct += 1
                total += 1
                if i % 100 == 0:
                    print(" gt: ",gt," pred: ",pred)
            word_test_accuracy = correct / total
            print("test accuracy : " ,word_test_accuracy )
            wandb.log({'word_test_acc' : word_test_accuracy})
    
    def get_predicted_word(self, inputs):
        if self.config.cell_type in ['rnn', 'gru']:
            hidden = [tf.zeros((1, self.config.hidden_units))]
        else:
            hidden = [tf.zeros((1, self.config.hidden_units)),
                      tf.zeros((1, self.config.hidden_units))]          
        encoder_input = tf.expand_dims(tf.convert_to_tensor(inputs), 0)
        encoder_output, decoder_hidden = self.encoder(encoder_input, hidden)
        decoder_input = tf.expand_dims([target_token_index['\t']], 0)  # append start token
        predicted_word = ''
        for t in range(max_decoder_seq_length):
            decoder_output, decoder_hidden, attention_weights = self.decoder(decoder_input, decoder_hidden, encoder_output)
            token = np.argmax(decoder_output[0].numpy())
            if inverse_target_token_index[token] != "\n":  # if not end token
                predicted_word += inverse_target_token_index[token]
            else:
                return predicted_word
            decoder_input = tf.expand_dims([token], 0) 
        return predicted_word


In [13]:
import wandb
wandb.login()
sweep_config={
    'method': 'bayes',
    'metric': {
        'name': 'word_val_acc',
        'goal': 'maximize'
    },
    'parameters':{
        'dropout': {
            'values': [0, 0.3]
        },
        'learning_rate': {
            'values': [0.0006,0.001, 0.002]
        },
        'batch_size': {
            'values': [128,256]
        },
        'input_embedding_size': {
            'values': [128, 256, 512]
        },
        'hidden_units':{
            'values': [256, 512,768]
        },
        'cell_type': {
            'values': ['gru','lstm']
        },
        'epochs':{
            'values': [15,20]
        },
       'initializer':{
            'values':["glorot_uniform","orthogonal"]
        }
    }
}

sweep_id = wandb.sweep(sweep_config, project="cs6910_assigment3_attention", entity="cs21s006_cs21s043")
print('sweep_id: ', sweep_id)

def spawn_fn():
      with wandb.init(project="cs6910_assigment3_attention", entity="cs21s006_cs21s043") as run:
        config = wandb.config
        print(config)
        wandb.run.name = 'ep-'+str(config.epochs)+'-dr-'+str(config.dropout)+'-lr-'+str(config.learning_rate)+'-bs'+str(config.batch_size)+'-es-'+str(config.input_embedding_size)\
        +'-hs-'+str(config.hidden_units)+'-cell-'+str(config.cell_type)+'-init-'+str(config.initializer)
        num_batches = len(train_encoder_input_data)//config.batch_size
        dataset = tf.data.Dataset.from_tensor_slices((train_encoder_input_data, train_decoder_input_data)).shuffle(buffer_size)
        dataset = dataset.batch(config.batch_size, drop_remainder=True)
        model = TransliterationAttentionModel(config)
        model.train(dataset, config.batch_size, num_batches)
        print("Calculating validation scores:")
        model.calculate_accuracy(split='val')
        print("Calculating test scores:")
        model.calculate_accuracy(split='test')

Create sweep with ID: u0qyvmkv
Sweep URL: https://wandb.ai/cs21s006_cs21s043/cs6910_assigment3_attention/sweeps/u0qyvmkv
sweep_id:  u0qyvmkv


In [14]:
wandb.agent("h6lw7vkh", spawn_fn,count=5)

[34m[1mwandb[0m: Agent Starting Run: qve0573e with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	hidden_units: 768
[34m[1mwandb[0m: 	initializer: orthogonal
[34m[1mwandb[0m: 	input_embedding_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.0006


{'batch_size': 128, 'cell_type': 'gru', 'dropout': 0.3, 'epochs': 20, 'hidden_units': 768, 'initializer': 'orthogonal', 'input_embedding_size': 256, 'learning_rate': 0.0006}










345it [01:07,  5.14it/s]


Epoch 1 Loss 0.9865   


345it [00:43,  7.89it/s]


Epoch 2 Loss 0.6817   


345it [00:44,  7.77it/s]


Epoch 3 Loss 0.2528   


345it [00:44,  7.68it/s]


Epoch 4 Loss 0.1756   


345it [00:45,  7.63it/s]


Epoch 5 Loss 0.1442   


345it [00:45,  7.59it/s]


Epoch 6 Loss 0.1318   


345it [00:45,  7.58it/s]


Epoch 7 Loss 0.1156   


345it [00:45,  7.57it/s]


Epoch 8 Loss 0.0993   


345it [00:45,  7.57it/s]


Epoch 9 Loss 0.1017   


345it [00:45,  7.57it/s]


Epoch 10 Loss 0.0801   


345it [00:45,  7.57it/s]


Epoch 11 Loss 0.0702   


345it [00:45,  7.57it/s]


Epoch 12 Loss 0.0630   


345it [00:45,  7.57it/s]


Epoch 13 Loss 0.0630   


345it [00:45,  7.58it/s]


Epoch 14 Loss 0.0665   


345it [00:45,  7.57it/s]


Epoch 15 Loss 0.0670   


345it [00:45,  7.57it/s]


Epoch 16 Loss 0.0567   


345it [00:45,  7.57it/s]


Epoch 17 Loss 0.0417   


345it [00:45,  7.57it/s]


Epoch 18 Loss 0.0366   


345it [00:45,  7.57it/s]


Epoch 19 Loss 0.0338   


345it [00:44,  7.79it/s]


Epoch 20 Loss 0.0445   
Calculating validation scores:
 gt:  अंकन  pred:  आंकन
 gt:  अनुसूया  pred:  अनुसूय
 gt:  असमानताएं  pred:  असमानतयान
 gt:  आश्चर्यजनक  pred:  आश्चर्यमणक
 gt:  उतराई  pred:  उतराई
 gt:  एड  pred:  एडी
 gt:  कंट्रोल्ड  pred:  कंट्रोल्ड
 gt:  काउंटर  pred:  कंटर
 gt:  कैरेक्टर्स  pred:  चैराक्टर्स
 gt:  खट्टा  pred:  खट्टा
 gt:  गीले  pred:  गिले
 gt:  चचेरी  pred:  चाचेरी
 gt:  छाछ  pred:  छाछ
 gt:  जनसूचना  pred:  जनसूचना
 gt:  जियान  pred:  जीएं
 gt:  झोंकने  pred:  झोंकने
 gt:  डायनासोरों  pred:  डायनसोरो
 gt:  तर  pred:  तार
 gt:  थैला  pred:  थैला
 gt:  दिलायी  pred:  दिलायी
 gt:  नक्सलवादियों  pred:  नकसलवादियों
 gt:  निरक्षरता  pred:  निरक्षरता
 gt:  पप्पी  pred:  पापी
 gt:  पिछड़ी  pred:  पिछड़ी
 gt:  प्रतिनिधत्व  pred:  प्रतिनिधि
 gt:  फालना  pred:  फलना
 gt:  बचाएं  pred:  बचाएं
 gt:  बाइंडर  pred:  बिंदर
 gt:  बैनेट  pred:  बैनेट
 gt:  भुगतता  pred:  भूगता
 gt:  मनाया  pred:  मानाया
 gt:  मिलिंद  pred:  मिलिंड
 gt:  मैनी  pred:  मान्य
 gt:  रफीक  pred:

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
running_validation_word_accuracy,▁▇▆▇▇▇▇▇▇▇▇▇▇▇█▇▇▇▇█████████████████████
word_test_acc,▁
word_val_acc,▁

0,1
running_validation_word_accuracy,0.39642
word_test_acc,0.39627
word_val_acc,0.39514


[34m[1mwandb[0m: Agent Starting Run: le6zqho9 with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	hidden_units: 768
[34m[1mwandb[0m: 	initializer: glorot_uniform
[34m[1mwandb[0m: 	input_embedding_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.002


{'batch_size': 256, 'cell_type': 'lstm', 'dropout': 0.3, 'epochs': 20, 'hidden_units': 768, 'initializer': 'glorot_uniform', 'input_embedding_size': 128, 'learning_rate': 0.002}


172it [01:03,  2.69it/s]


Epoch 1 Loss 1.0036   


172it [00:38,  4.44it/s]


Epoch 2 Loss 0.5061   


172it [00:39,  4.34it/s]


Epoch 3 Loss 0.2049   


172it [00:40,  4.29it/s]


Epoch 4 Loss 0.1585   


172it [00:40,  4.25it/s]


Epoch 5 Loss 0.1331   


172it [00:40,  4.22it/s]


Epoch 6 Loss 0.1140   


172it [00:40,  4.21it/s]


Epoch 7 Loss 0.1135   


172it [00:40,  4.20it/s]


Epoch 8 Loss 0.0931   


172it [00:39,  4.32it/s]


Epoch 9 Loss 0.0758   


172it [00:40,  4.26it/s]


Epoch 10 Loss 0.0797   


172it [00:40,  4.23it/s]


Epoch 11 Loss 0.0653   


172it [00:40,  4.22it/s]


Epoch 12 Loss 0.0852   


172it [00:40,  4.20it/s]


Epoch 13 Loss 0.0759   


172it [00:41,  4.19it/s]


Epoch 14 Loss 0.0559   


172it [00:41,  4.18it/s]


Epoch 15 Loss 0.0457   


172it [00:41,  4.18it/s]


Epoch 16 Loss 0.0393   


172it [00:41,  4.17it/s]


Epoch 17 Loss 0.0347   


172it [00:39,  4.32it/s]


Epoch 18 Loss 0.0452   


172it [00:40,  4.27it/s]


Epoch 19 Loss 0.0572   


172it [00:40,  4.23it/s]


Epoch 20 Loss 0.0402   
Calculating validation scores:
 gt:  अंकन  pred:  आंकन
 gt:  अनुसूया  pred:  अनुसूया
 gt:  असमानताएं  pred:  असमानतयें
 gt:  आश्चर्यजनक  pred:  आश्चर्यजनक
 gt:  उतराई  pred:  उतराई
 gt:  एड  pred:  एडी
 gt:  कंट्रोल्ड  pred:  कॉन्ट्रोलर्ड
 gt:  काउंटर  pred:  कंटर
 gt:  कैरेक्टर्स  pred:  चार्स्टर्स
 gt:  खट्टा  pred:  खट्टा
 gt:  गीले  pred:  गीले
 gt:  चचेरी  pred:  चचेरी
 gt:  छाछ  pred:  छाछ
 gt:  जनसूचना  pred:  जनसूचना
 gt:  जियान  pred:  जिएं
 gt:  झोंकने  pred:  झोंकने
 gt:  डायनासोरों  pred:  डायनेसोरो
 gt:  तर  pred:  तर
 gt:  थैला  pred:  थाईला
 gt:  दिलायी  pred:  दिलाई
 gt:  नक्सलवादियों  pred:  नकसालवादियों
 gt:  निरक्षरता  pred:  निराक्षरता
 gt:  पप्पी  pred:  पापी
 gt:  पिछड़ी  pred:  पिछड़ी
 gt:  प्रतिनिधत्व  pred:  प्रतिनिधित्व
 gt:  फालना  pred:  फलना
 gt:  बचाएं  pred:  बचाएं
 gt:  बाइंडर  pred:  बिंदर
 gt:  बैनेट  pred:  बैनेट
 gt:  भुगतता  pred:  भुगताता
 gt:  मनाया  pred:  मानाया
 gt:  मिलिंद  pred:  मिलींड
 gt:  मैनी  pred:  मान्य
 gt:  र

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
running_validation_word_accuracy,▁▇▇▇▇▇▇▇▇▇▇▇████████████████████████████
word_test_acc,▁
word_val_acc,▁

0,1
running_validation_word_accuracy,0.41339
word_test_acc,0.40515
word_val_acc,0.41097


[34m[1mwandb[0m: Agent Starting Run: 9x3k0ij9 with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	hidden_units: 768
[34m[1mwandb[0m: 	initializer: orthogonal
[34m[1mwandb[0m: 	input_embedding_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001


{'batch_size': 256, 'cell_type': 'lstm', 'dropout': 0.3, 'epochs': 20, 'hidden_units': 768, 'initializer': 'orthogonal', 'input_embedding_size': 128, 'learning_rate': 0.001}


172it [01:04,  2.68it/s]


Epoch 1 Loss 1.0435   


172it [00:38,  4.42it/s]


Epoch 2 Loss 0.8352   


172it [00:39,  4.34it/s]


Epoch 3 Loss 0.4315   


172it [00:40,  4.28it/s]


Epoch 4 Loss 0.2209   


172it [00:40,  4.23it/s]


Epoch 5 Loss 0.1735   


172it [00:40,  4.21it/s]


Epoch 6 Loss 0.1493   


172it [00:41,  4.19it/s]


Epoch 7 Loss 0.1375   


172it [00:39,  4.31it/s]


Epoch 8 Loss 0.1173   


172it [00:40,  4.26it/s]


Epoch 9 Loss 0.1060   


172it [00:40,  4.22it/s]


Epoch 10 Loss 0.1456   


172it [00:40,  4.20it/s]


Epoch 11 Loss 0.1361   


172it [00:41,  4.19it/s]


Epoch 12 Loss 0.0987   


172it [00:39,  4.32it/s]


Epoch 13 Loss 0.0845   


172it [00:40,  4.26it/s]


Epoch 14 Loss 0.0772   


172it [00:40,  4.22it/s]


Epoch 15 Loss 0.0717   


172it [00:40,  4.20it/s]


Epoch 16 Loss 0.0711   


172it [00:40,  4.20it/s]


Epoch 17 Loss 0.0688   


172it [00:39,  4.33it/s]


Epoch 18 Loss 0.0556   


172it [00:40,  4.26it/s]


Epoch 19 Loss 0.0526   


172it [00:40,  4.22it/s]


Epoch 20 Loss 0.0501   
Calculating validation scores:
 gt:  अंकन  pred:  अंकन
 gt:  अनुसूया  pred:  अनुसूया
 gt:  असमानताएं  pred:  असमानतयें
 gt:  आश्चर्यजनक  pred:  अश्चर्यदनक
 gt:  उतराई  pred:  उतरे
 gt:  एड  pred:  एड
 gt:  कंट्रोल्ड  pred:  कॉन्ट्रोल्ड
 gt:  काउंटर  pred:  काउंटर
 gt:  कैरेक्टर्स  pred:  चारकतर्स
 gt:  खट्टा  pred:  खट्टा
 gt:  गीले  pred:  गिले
 gt:  चचेरी  pred:  चाचरी
 gt:  छाछ  pred:  छाच
 gt:  जनसूचना  pred:  जनसूच्णा
 gt:  जियान  pred:  जीयान
 gt:  झोंकने  pred:  झोंकाने
 gt:  डायनासोरों  pred:  डायनेनरो
 gt:  तर  pred:  तर्र
 gt:  थैला  pred:  थैला
 gt:  दिलायी  pred:  दिलायी
 gt:  नक्सलवादियों  pred:  नकसलवादियों
 gt:  निरक्षरता  pred:  निरक्षणता
 gt:  पप्पी  pred:  पेपी
 gt:  पिछड़ी  pred:  पिछड़ी
 gt:  प्रतिनिधत्व  pred:  प्रतिनिधित्व
 gt:  फालना  pred:  फलना
 gt:  बचाएं  pred:  बचाएं
 gt:  बाइंडर  pred:  बिंदर
 gt:  बैनेट  pred:  बैनेट
 gt:  भुगतता  pred:  भूगताता
 gt:  मनाया  pred:  मानाया
 gt:  मिलिंद  pred:  मिलींड
 gt:  मैनी  pred:  मान्य
 gt:  रफ

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
running_validation_word_accuracy,█▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▂▂
word_test_acc,▁
word_val_acc,▁

0,1
running_validation_word_accuracy,0.40153
word_test_acc,0.4036
word_val_acc,0.39972


[34m[1mwandb[0m: Agent Starting Run: r005d4oo with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	hidden_units: 768
[34m[1mwandb[0m: 	initializer: glorot_uniform
[34m[1mwandb[0m: 	input_embedding_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001


{'batch_size': 256, 'cell_type': 'lstm', 'dropout': 0.3, 'epochs': 20, 'hidden_units': 768, 'initializer': 'glorot_uniform', 'input_embedding_size': 128, 'learning_rate': 0.001}


172it [01:03,  2.69it/s]


Epoch 1 Loss 1.0407   


172it [00:39,  4.40it/s]


Epoch 2 Loss 0.7794   


172it [00:39,  4.32it/s]


Epoch 3 Loss 0.3809   


172it [00:40,  4.28it/s]


Epoch 4 Loss 0.2298   


172it [00:40,  4.24it/s]


Epoch 5 Loss 0.1788   


172it [00:40,  4.20it/s]


Epoch 6 Loss 0.1538   


172it [00:41,  4.19it/s]


Epoch 7 Loss 0.1374   


172it [00:41,  4.18it/s]


Epoch 8 Loss 0.1230   


172it [00:39,  4.32it/s]


Epoch 9 Loss 0.1083   


172it [00:40,  4.25it/s]


Epoch 10 Loss 0.0981   


172it [00:40,  4.21it/s]


Epoch 11 Loss 0.1079   


172it [00:41,  4.19it/s]


Epoch 12 Loss 0.0814   


172it [00:41,  4.19it/s]


Epoch 13 Loss 0.0764   


172it [00:39,  4.33it/s]


Epoch 14 Loss 0.0720   


172it [00:40,  4.27it/s]


Epoch 15 Loss 0.0870   


172it [00:40,  4.22it/s]


Epoch 16 Loss 0.0637   


172it [00:40,  4.21it/s]


Epoch 17 Loss 0.0617   


172it [00:41,  4.19it/s]


Epoch 18 Loss 0.0757   


172it [00:39,  4.32it/s]


Epoch 19 Loss 0.0728   


172it [00:40,  4.25it/s]


Epoch 20 Loss 0.0581   
Calculating validation scores:
 gt:  अंकन  pred:  अंकन
 gt:  अनुसूया  pred:  अनुसूया
 gt:  असमानताएं  pred:  असमानतयें
 gt:  आश्चर्यजनक  pred:  अश्चर्यजनक
 gt:  उतराई  pred:  उतरे
 gt:  एड  pred:  एडी
 gt:  कंट्रोल्ड  pred:  कंट्रोलेड
 gt:  काउंटर  pred:  कंटर
 gt:  कैरेक्टर्स  pred:  चारास्टर्स
 gt:  खट्टा  pred:  खत्टा
 gt:  गीले  pred:  गिले
 gt:  चचेरी  pred:  चचेरी
 gt:  छाछ  pred:  छच
 gt:  जनसूचना  pred:  जनसूचना
 gt:  जियान  pred:  जिएन
 gt:  झोंकने  pred:  झोंकने
 gt:  डायनासोरों  pred:  डायनसरो
 gt:  तर  pred:  तर्र
 gt:  थैला  pred:  थैला
 gt:  दिलायी  pred:  दिलाई
 gt:  नक्सलवादियों  pred:  नक्सलवादियों
 gt:  निरक्षरता  pred:  निराक्षरता
 gt:  पप्पी  pred:  पप्पी
 gt:  पिछड़ी  pred:  पिछड़ी
 gt:  प्रतिनिधत्व  pred:  प्रतिनिधित्व
 gt:  फालना  pred:  फलना
 gt:  बचाएं  pred:  बचाएं
 gt:  बाइंडर  pred:  बिंदर
 gt:  बैनेट  pred:  बैनेट
 gt:  भुगतता  pred:  भुगताता
 gt:  मनाया  pred:  मानाया
 gt:  मिलिंद  pred:  मिलींड
 gt:  मैनी  pred:  मान्य
 gt:  रफीक  

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
running_validation_word_accuracy,█▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂
word_test_acc,▁
word_val_acc,▁

0,1
running_validation_word_accuracy,0.39874
word_test_acc,0.39338
word_val_acc,0.39789


[34m[1mwandb[0m: Agent Starting Run: 1wgodbc6 with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	hidden_units: 768
[34m[1mwandb[0m: 	initializer: orthogonal
[34m[1mwandb[0m: 	input_embedding_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.0006


{'batch_size': 256, 'cell_type': 'gru', 'dropout': 0.3, 'epochs': 15, 'hidden_units': 768, 'initializer': 'orthogonal', 'input_embedding_size': 256, 'learning_rate': 0.0006}


172it [00:59,  2.89it/s]


Epoch 1 Loss 1.0507   


172it [00:36,  4.76it/s]


Epoch 2 Loss 0.9032   


172it [00:36,  4.69it/s]


Epoch 3 Loss 0.7836   


172it [00:37,  4.63it/s]


Epoch 4 Loss 0.5107   


172it [00:37,  4.60it/s]


Epoch 5 Loss 0.2613   


172it [00:37,  4.58it/s]


Epoch 6 Loss 0.1886   


172it [00:37,  4.57it/s]


Epoch 7 Loss 0.1567   


172it [00:37,  4.55it/s]


Epoch 8 Loss 0.1457   


172it [00:37,  4.54it/s]


Epoch 9 Loss 0.1295   


172it [00:37,  4.54it/s]


Epoch 10 Loss 0.1162   


172it [00:37,  4.53it/s]


Epoch 11 Loss 0.1195   


172it [00:37,  4.54it/s]


Epoch 12 Loss 0.1077   


172it [00:37,  4.54it/s]


Epoch 13 Loss 0.0889   


172it [00:37,  4.54it/s]


Epoch 14 Loss 0.0793   


172it [00:37,  4.54it/s]


Epoch 15 Loss 0.0819   
Calculating validation scores:
 gt:  अंकन  pred:  एंकन
 gt:  अनुसूया  pred:  अनुस्य
 gt:  असमानताएं  pred:  असमांताएं
 gt:  आश्चर्यजनक  pred:  अश्चर्यजनक
 gt:  उतराई  pred:  उत्रे
 gt:  एड  pred:  एडी
 gt:  कंट्रोल्ड  pred:  कंट्रॉल्ड
 gt:  काउंटर  pred:  काउंटर
 gt:  कैरेक्टर्स  pred:  चराक्टर्स
 gt:  खट्टा  pred:  खत्ता
 gt:  गीले  pred:  गिले
 gt:  चचेरी  pred:  चचरी
 gt:  छाछ  pred:  छाछ
 gt:  जनसूचना  pred:  जनसूचना
 gt:  जियान  pred:  जियन
 gt:  झोंकने  pred:  झोंकने
 gt:  डायनासोरों  pred:  डायूमासोरो
 gt:  तर  pred:  तर्र
 gt:  थैला  pred:  ठैला
 gt:  दिलायी  pred:  दिलायी
 gt:  नक्सलवादियों  pred:  नकसाल्वादियों
 gt:  निरक्षरता  pred:  निरक्षरता
 gt:  पप्पी  pred:  पाप्पी
 gt:  पिछड़ी  pred:  पिछड़ी
 gt:  प्रतिनिधत्व  pred:  प्रतिनिधित्व
 gt:  फालना  pred:  फलना
 gt:  बचाएं  pred:  बचाएं
 gt:  बाइंडर  pred:  बिंदर
 gt:  बैनेट  pred:  बैनेट
 gt:  भुगतता  pred:  भूगताता
 gt:  मनाया  pred:  मानाया
 gt:  मिलिंद  pred:  मिलिंड
 gt:  मैनी  pred:  मान्य
 gt:  

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
running_validation_word_accuracy,▁██▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
word_test_acc,▁
word_val_acc,▁

0,1
running_validation_word_accuracy,0.39363
word_test_acc,0.39627
word_val_acc,0.39261
