In [1]:
!pip install wandb
!pip install wordcloud
!pip install colour

Collecting wandb
  Downloading wandb-0.12.16-py2.py3-none-any.whl (1.8 MB)
[K     |████████████████████████████████| 1.8 MB 11.3 MB/s 
[?25hCollecting sentry-sdk>=1.0.0
  Downloading sentry_sdk-1.5.12-py2.py3-none-any.whl (145 kB)
[K     |████████████████████████████████| 145 kB 11.0 MB/s 
Collecting docker-pycreds>=0.4.0
  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)
Collecting setproctitle
  Downloading setproctitle-1.2.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (29 kB)
Collecting shortuuid>=0.5.0
  Downloading shortuuid-1.0.9-py3-none-any.whl (9.4 kB)
Collecting GitPython>=1.0.0
  Downloading GitPython-3.1.27-py3-none-any.whl (181 kB)
[K     |████████████████████████████████| 181 kB 52.4 MB/s 
[?25hCollecting pathtools
  Downloading pathtools-0.1.2.tar.gz (11 kB)
Collecting gitdb<5,>=4.0.1
  Downloading gitdb-4.0.9-py3-none-any.whl (63 kB)
[K     |████████████████████████████████| 63 kB 1.8 MB/s 
[?25hC

In [2]:
## Installing font for Hindi for matplotlib ##
!apt-get install -y fonts-lohit-deva
!fc-list :lang=hi family

Reading package lists... Done
Building dependency tree       
Reading state information... Done
The following packages were automatically installed and are no longer required:
  libnvidia-common-460 nsight-compute-2020.2.0
Use 'apt autoremove' to remove them.
The following NEW packages will be installed:
  fonts-lohit-deva
0 upgraded, 1 newly installed, 0 to remove and 42 not upgraded.
Need to get 78.2 kB of archives.
After this operation, 196 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu bionic/main amd64 fonts-lohit-deva all 2.95.4-2 [78.2 kB]
Fetched 78.2 kB in 1s (84.3 kB/s)
Selecting previously unselected package fonts-lohit-deva.
(Reading database ... 155203 files and directories currently installed.)
Preparing to unpack .../fonts-lohit-deva_2.95.4-2_all.deb ...
Unpacking fonts-lohit-deva (2.95.4-2) ...
Setting up fonts-lohit-deva (2.95.4-2) ...
Processing triggers for fontconfig (2.12.6-0ubuntu2) ...
Lohit Devanagari


In [3]:
import os
import random
import time
import wandb
import re, string
import numpy as np
import pandas as pd 
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
from wordcloud import WordCloud, STOPWORDS
from collections import Counter
from colour import Color
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

import tensorflow as tf
from tensorflow.keras import layers
import tensorflow.keras.backend as K
from tensorflow.keras.preprocessing.text import Tokenizer

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


#Data Loading

In [5]:
## Download the dataset ##
import requests
import tarfile

def download_data(save_path):

    data_url = r"https://storage.googleapis.com/gresearch/dakshina/dakshina_dataset_v1.0.tar"

    r = requests.get(data_url, allow_redirects=True)
    tar_path = "data_assignment3.tar"

    if r.status_code == 200:
        with open(tar_path, 'wb') as f:
            f.write(r.content)

    tar_file = tarfile.open(tar_path)
    tar_file.extractall(save_path)
    tar_file.close()



# uncomment the line below if downloading data for the 1st time
# download_data("/content/drive/MyDrive/DakshinaDataset")

#Prepocessing the Data

In [6]:
def get_data_files(language):
    """ Function fo read data 
    """

    template = "/content/drive/MyDrive/DakshinaDataset/dakshina_dataset_v1.0/{}/lexicons/{}.translit.sampled.{}.tsv"

    train_tsv = template.format(language, language, "train")
    val_tsv = template.format(language, language, "dev")
    test_tsv = template.format(language, language, "test")

    return train_tsv, val_tsv, test_tsv


def add_start_end_tokens(df, cols, sos="\t", eos="\n"):
    """ Adds EOS and SOS tokens to data 
    """
    def add_tokens(s):  
        # \t = starting token
        # \n = ending token
        return sos + str(s) + eos

    for col in cols:
        df[col] = df[col].apply(add_tokens) 
    
def tokenize(lang, tokenizer=None):
    """ Uses tf.keras tokenizer to tokenize the data/words into characters
    """

    if tokenizer is None:
        tokenizer = Tokenizer(char_level=True)
        tokenizer.fit_on_texts(lang)

        lang_tensor = tokenizer.texts_to_sequences(lang)
        lang_tensor = tf.keras.preprocessing.sequence.pad_sequences(lang_tensor,
                                                            padding='post')

    else: 
        lang_tensor = tokenizer.texts_to_sequences(lang)
        lang_tensor = tf.keras.preprocessing.sequence.pad_sequences(lang_tensor,
                                                            padding='post')

    return lang_tensor, tokenizer

def preprocess_data(fpath, input_lang_tokenizer=None, targ_lang_tokenizer=None):
    """ Reads, tokenizes and adds SOS/EOS tokens to data based on above functions
    """

    df = pd.read_csv(fpath, sep="\t", header=None)

    # Addition of start and end tokens
    add_start_end_tokens(df, [0,1])
    
    input_lang_tensor, input_tokenizer = tokenize(df[1].astype(str).tolist(), 
                                                    tokenizer=input_lang_tokenizer)
    
    targ_lang_tensor, targ_tokenizer = tokenize(df[0].astype(str).tolist(),
                                                    tokenizer=targ_lang_tokenizer) 
    
    dataset = tf.data.Dataset.from_tensor_slices((input_lang_tensor, targ_lang_tensor))
    dataset = dataset.shuffle(len(dataset))
    
    return dataset, input_tokenizer, targ_tokenizer

#Building the Model

In [7]:
def get_layer(name, units, dropout, return_state=False, return_sequences=False):

    if name=="rnn":
        return layers.SimpleRNN(units=units, dropout=dropout, 
                                return_state=return_state,
                                return_sequences=return_sequences)

    if name=="gru":
        return layers.GRU(units=units, dropout=dropout, 
                          return_state=return_state,
                          return_sequences=return_sequences)

    if name=="lstm":
        return layers.LSTM(units=units, dropout=dropout, 
                           return_state=return_state,
                           return_sequences=return_sequences)

#Attention
class BahdanauAttention(tf.keras.layers.Layer):
  def __init__(self, units):
    super(BahdanauAttention, self).__init__()
    self.W1 = tf.keras.layers.Dense(units)
    self.W2 = tf.keras.layers.Dense(units)
    self.V = tf.keras.layers.Dense(1)

  def call(self, enc_state, enc_out):
    
    enc_state = tf.concat(enc_state, 1)
    enc_state = tf.expand_dims(enc_state, 1)

    score = self.V(tf.nn.tanh(self.W1(enc_state) + self.W2(enc_out)))

    attention_weights = tf.nn.softmax(score, axis=1)

    context_vector = attention_weights * enc_out
    context_vector = tf.reduce_sum(context_vector, axis=1)

    return context_vector, attention_weights


class Encoder(tf.keras.Model):
    def __init__(self, layer_type, n_layers, units, encoder_vocab_size, embedding_dim, dropout):
        super(Encoder, self).__init__()
        self.layer_type = layer_type
        self.n_layers = n_layers
        self.units = units
        self.dropout = dropout
        self.embedding = tf.keras.layers.Embedding(encoder_vocab_size, embedding_dim)
        self.create_rnn_layers()

    def call(self, x, hidden):
        x = self.embedding(x)
        x = self.rnn_layers[0](x, initial_state=hidden)

        for layer in self.rnn_layers[1:]:
            x = layer(x)

        output, state = x[0], x[1:]

        return output, state
    
    def create_rnn_layers(self):
        self.rnn_layers = []

        for i in range(self.n_layers):
            self.rnn_layers.append(get_layer(self.layer_type, self.units, self.dropout,
                                                return_sequences=True,
                                                return_state=True))


    def initialize_hidden_state(self, batch_size):

        if self.layer_type != "lstm":
            return [tf.zeros((batch_size, self.units))]
        else:
            return [tf.zeros((batch_size, self.units))]*2

class Decoder(tf.keras.Model):
    def __init__(self, layer_type, n_layers, units, decoder_vocab_size, embedding_dim, dropout, attention=False):
        super(Decoder, self).__init__()

        self.layer_type = layer_type
        self.n_layers = n_layers
        self.units = units
        self.dropout = dropout
        self.attention = attention
        self.embedding_layer = layers.Embedding(input_dim=decoder_vocab_size, 
                                                output_dim=embedding_dim)
        
        self.dense = layers.Dense(decoder_vocab_size, activation="softmax")
        self.flatten = layers.Flatten()
        if self.attention:
            self.attention_layer = BahdanauAttention(self.units)
        self.create_rnn_layers()

    def call(self, x, hidden, enc_out=None):
        
        x = self.embedding_layer(x)

        if self.attention:
            context_vector, attention_weights = self.attention_layer(hidden, enc_out)
            x = tf.concat([tf.expand_dims(context_vector, 1), x], -1)
        else:
            attention_weights = None

        x = self.rnn_layers[0](x, initial_state=hidden)

        for layer in self.rnn_layers[1:]:
            x = layer(x)

        output, state = x[0], x[1:]

        output = self.dense(self.flatten(output))
        
        return output, state, attention_weights

    def create_rnn_layers(self):
        self.rnn_layers = []    

        for i in range(self.n_layers - 1):
            self.rnn_layers.append(get_layer(self.layer_type, self.units, self.dropout,
                                                return_sequences=True,
                                                return_state=True))
        
        self.rnn_layers.append(get_layer(self.layer_type, self.units, self.dropout,
                                            return_sequences=False,
                                            return_state=True))

#Core Sequence to Sequence Model

In [8]:
class Seq2SeqModel():
    def __init__(self, embedding_dim, encoder_layers, decoder_layers, layer_type, units, dropout, attention=False):
        self.embedding_dim = embedding_dim
        self.encoder_layers = encoder_layers
        self.decoder_layers = decoder_layers
        self.layer_type = layer_type
        self.units = units
        self.dropout = dropout
        self.attention = attention
        self.stats = []
        self.batch_size = 128

    def build(self, loss, optimizer, metric):
        self.loss = loss
        self.optimizer = optimizer
        self.metric = metric

    def set_vocabulary(self, input_tokenizer, targ_tokenizer):
        self.input_tokenizer = input_tokenizer
        self.targ_tokenizer = targ_tokenizer
        self.create_model()
    
    def create_model(self):

        encoder_vocab_size = len(self.input_tokenizer.word_index) + 1
        decoder_vocab_size = len(self.targ_tokenizer.word_index) + 1

        self.encoder = Encoder(self.layer_type, self.encoder_layers, self.units, encoder_vocab_size,
                               self.embedding_dim, self.dropout)

        self.decoder = Decoder(self.layer_type, self.decoder_layers, self.units, decoder_vocab_size,
                               self.embedding_dim,  self.dropout, self.attention)

    @tf.function
    def train_step(self, input, target, enc_state):

        loss = 0 

        with tf.GradientTape() as tape: 

            enc_out, enc_state = self.encoder(input, enc_state)

            dec_state = enc_state
            dec_input = tf.expand_dims([self.targ_tokenizer.word_index["\t"]]*self.batch_size ,1)

            #Teacher Forcing

            if random.random() < self.teacher_forcing_ratio:

                for t in range(1, target.shape[1]):

                    preds, dec_state, _ = self.decoder(dec_input, dec_state, enc_out)
                    loss += self.loss(target[:,t], preds)
                    self.metric.update_state(target[:,t], preds)
                    
                    dec_input = tf.expand_dims(target[:,t], 1)
            
            else:

                for t in range(1, target.shape[1]):

                    preds, dec_state, _ = self.decoder(dec_input, dec_state, enc_out)
                    loss += self.loss(target[:,t], preds)
                    self.metric.update_state(target[:,t], preds)

                    preds = tf.argmax(preds, 1)
                    dec_input = tf.expand_dims(preds, 1)


            batch_loss = loss / target.shape[1]

            variables = self.encoder.variables + self.decoder.variables
            gradients = tape.gradient(loss, variables)

            self.optimizer.apply_gradients(zip(gradients, variables))

        return batch_loss, self.metric.result()

    @tf.function
    def validation_step(self, input, target, enc_state):

        loss = 0
        
        enc_out, enc_state = self.encoder(input, enc_state)

        dec_state = enc_state
        dec_input = tf.expand_dims([self.targ_tokenizer.word_index["\t"]]*self.batch_size ,1)

        for t in range(1, target.shape[1]):

            preds, dec_state, _ = self.decoder(dec_input, dec_state, enc_out)
            loss += self.loss(target[:,t], preds)
            self.metric.update_state(target[:,t], preds)

            preds = tf.argmax(preds, 1)
            dec_input = tf.expand_dims(preds, 1)

        batch_loss = loss / target.shape[1]
        
        return batch_loss, self.metric.result()


    def fit(self, dataset, val_dataset, batch_size=128, epochs=10, use_wandb=False, teacher_forcing_ratio=1.0):

        self.batch_size = batch_size
        self.teacher_forcing_ratio = teacher_forcing_ratio

        steps_per_epoch = len(dataset) // self.batch_size
        steps_per_epoch_val = len(val_dataset) // self.batch_size
        
        dataset = dataset.batch(self.batch_size, drop_remainder=True)
        val_dataset = val_dataset.batch(self.batch_size, drop_remainder=True)

        #Translation
        sample_inp, sample_targ = next(iter(dataset))
        self.max_target_len = sample_targ.shape[1]
        self.max_input_len = sample_inp.shape[1]

        template = "\nTrain Loss: {0:.4f} Train Accuracy: {1:.4f} Validation Loss: {2:.4f} Validation Accuracy: {3:.4f}"

        print("-"*100)
        for epoch in range(1, epochs+1):
            print(f"EPOCH {epoch}\n")

            
            total_loss = 0
            total_acc = 0
            self.metric.reset_states()

            starting_time = time.time()
            enc_state = self.encoder.initialize_hidden_state(self.batch_size)

            print("Training ...\n")
            for batch, (input, target) in enumerate(dataset.take(steps_per_epoch)):
                batch_loss, acc = self.train_step(input, target, enc_state)
                total_loss += batch_loss
                total_acc += acc


                if batch==0 or ((batch + 1) % 100 == 0):
                    print(f"Batch {batch+1} Loss {batch_loss:.4f}")

            avg_acc = total_acc / steps_per_epoch
            avg_loss = total_loss / steps_per_epoch

            # Validation
            total_val_loss = 0
            total_val_acc = 0
            self.metric.reset_states()

            enc_state = self.encoder.initialize_hidden_state(self.batch_size)

            print("\nValidating ...")
            for batch, (input, target) in enumerate(val_dataset.take(steps_per_epoch_val)):
                batch_loss, acc = self.validation_step(input, target, enc_state)
                total_val_loss += batch_loss
                total_val_acc += acc

            avg_val_acc = total_val_acc / steps_per_epoch_val
            avg_val_loss = total_val_loss / steps_per_epoch_val

            print(template.format(avg_loss, avg_acc*100, avg_val_loss, avg_val_acc*100))
            
            time_taken = time.time() - starting_time
            self.stats.append({"epoch": epoch,
                            "train loss": avg_loss,
                            "val loss": avg_val_loss,
                            "train acc": avg_acc*100,
                            "val acc": avg_val_acc*100,
                            "training time": time_taken})
            
            if use_wandb:
                wandb.log(self.stats[-1])
            
            print(f"\nTime taken for the epoch {time_taken:.4f}")
            print("-"*100)
        
        print("\nModel trained successfully !!")
        
    def evaluate(self, test_dataset, batch_size=None):

        if batch_size is not None:
            self.batch_size = batch_size

        steps_per_epoch_test = len(test_dataset) // batch_size
        test_dataset = test_dataset.batch(batch_size, drop_remainder=True)
        
        total_test_loss = 0
        total_test_acc = 0
        self.metric.reset_states()

        enc_state = self.encoder.initialize_hidden_state(self.batch_size)

        print("\nRunning test dataset through the model...\n")
        for batch, (input, target) in enumerate(test_dataset.take(steps_per_epoch_test)):
            batch_loss, acc = self.validation_step(input, target, enc_state)
            total_test_loss += batch_loss
            total_test_acc += acc

        avg_test_acc = total_test_acc / steps_per_epoch_test
        avg_test_loss = total_test_loss / steps_per_epoch_test
    
        print(f"Test Loss: {avg_test_loss:.4f} Test Accuracy: {avg_test_acc:.4f}")

        return avg_test_loss, avg_test_acc


    def translate(self, word, get_heatmap=False):

        word = "\t" + word + "\n"

        inputs = self.input_tokenizer.texts_to_sequences([word])
        inputs = tf.keras.preprocessing.sequence.pad_sequences(inputs,
                                                               maxlen=self.max_input_len,
                                                               padding="post")

        result = ""
        att_wts = []

        enc_state = self.encoder.initialize_hidden_state(1)
        enc_out, enc_state = self.encoder(inputs, enc_state)

        dec_state = enc_state
        dec_input = tf.expand_dims([self.targ_tokenizer.word_index["\t"]]*1, 1)

        for t in range(1, self.max_target_len):

            preds, dec_state, attention_weights = self.decoder(dec_input, dec_state, enc_out)
            
            if get_heatmap:
                att_wts.append(attention_weights)
            
            preds = tf.argmax(preds, 1)
            next_char = self.targ_tokenizer.index_word[preds.numpy().item()]
            result += next_char

            dec_input = tf.expand_dims(preds, 1)

            if next_char == "\n":
                return result[:-1], att_wts[:-1]

        return result[:-1], att_wts[:-1]


In [9]:
def randomly_evaluate(model, test_file=get_data_files("hi")[2], n=10):

    df = pd.read_csv(test_file, sep="\t", header=None)
    df = df.sample(n=n).reset_index(drop=True)

    print(f"Randomly evaluating the model on {n} words\n")

    for i in range(n):
        word = str(df[1][i])

        print(f"Input word: {word}")
        print(f"Actual translation: {str(df[0][i])}")
        print(f"Model translation: {model.translate(word)[0]}\n")


def test_on_dataset(language, embedding_dim, encoder_layers, decoder_layers, layer_type, units, dropout, attention, teacher_forcing_ratio=1.0, save_outputs=None):
    
    TRAIN_TSV, VAL_TSV, TEST_TSV = get_data_files(language)

    model = Seq2SeqModel(embedding_dim, 
                         encoder_layers, 
                         decoder_layers, 
                         layer_type, 
                         units,
                         dropout,
                         attention)

    dataset, input_tokenizer, targ_tokenizer = preprocess_data(TRAIN_TSV)
    val_dataset, _, _ = preprocess_data(VAL_TSV, input_tokenizer, targ_tokenizer)

    model.set_vocabulary(input_tokenizer, targ_tokenizer)
    model.build(loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                optimizer = tf.keras.optimizers.Adam(),
                metric = tf.keras.metrics.SparseCategoricalAccuracy())
    
    model.fit(dataset, val_dataset, epochs=20, use_wandb=False, teacher_forcing_ratio=teacher_forcing_ratio)


    test_dataset, _, _ = preprocess_data(TEST_TSV, model.input_tokenizer, model.targ_tokenizer)
    test_loss, test_acc = model.evaluate(test_dataset, batch_size=100)

    ##  Wordaccuracy
    test_tsv = pd.read_csv(TEST_TSV, sep="\t", header=None)
    inputs = test_tsv[1].astype(str).tolist()
    targets = test_tsv[0].astype(str).tolist()
    
    outputs = []

    for word in inputs:
        outputs.append(model.translate(word)[0])

    def word_level_acc(outputs, targets):
        return np.sum(np.asarray(outputs) == np.array(targets)) / len(outputs)

    print(f"Word level accuracy: {word_level_acc(outputs, targets)}")

    if save_outputs is not None:
        df = pd.DataFrame()
        df["inputs"] = inputs
        df["targets"] = targets
        df["outputs"] = outputs
        df.to_csv(save_outputs)


    return model


#Model Connectivity Visualization

In [10]:
def get_lstm_output(decoder, x, hidden, enc_out=None):
    
    x = decoder.embedding_layer(x)

    if decoder.attention:
        context_vector, attention_weights = decoder.attention_layer(hidden, enc_out)
        x = tf.concat([tf.expand_dims(context_vector, 1), x], -1)
    else:
        attention_weights = None

    x = decoder.rnn_layers[0](x, initial_state=hidden)

    for layer in decoder.rnn_layers[1:]:
        x = layer(x)

    output, state = x[0], x[1:]

    
    return output, state, attention_weights

def get_output_from_embedding(encoder, x, hidden):

    x = encoder.rnn_layers[0](x, initial_state=hidden)

    for layer in encoder.rnn_layers[1:]:
        x = layer(x)

    output, state = x[0], x[1:]

    return output, state


def get_connectivity(model, word):

    word = "\t" + word + "\n"

    inputs = model.input_tokenizer.texts_to_sequences([word])
    inputs = tf.keras.preprocessing.sequence.pad_sequences(inputs,
                                                            maxlen=model.max_input_len,
                                                            padding="post")

    result = ""

    gradient_list = []

    enc_state = model.encoder.initialize_hidden_state(1)
    embedded_in = model.encoder.embedding(inputs)


    with tf.GradientTape(persistent=True, watch_accessed_variables=False) as tape:
        tape.watch(embedded_in)

        enc_out, enc_state = get_output_from_embedding(model.encoder, embedded_in, enc_state)

        dec_state = enc_state
        dec_input = tf.expand_dims([model.targ_tokenizer.word_index["\t"]]*1, 1)

        for t in range(1, model.max_target_len):

            lstm_out, dec_state, _ = get_lstm_output(model.decoder, dec_input, dec_state, enc_out)

            preds = model.decoder.dense(model.decoder.flatten(lstm_out))
            gradient_list.append(tape.gradient(lstm_out, embedded_in)[0])
            
            preds = tf.argmax(preds, 1)
            next_char = model.targ_tokenizer.index_word[preds.numpy().item()]
            result += next_char

            dec_input = tf.expand_dims(preds, 1)

            if next_char == "\n":
                return result[:-1], gradient_list[:-1]

        return result[:-1], gradient_list[:-1]

In [11]:
from sklearn.preprocessing import MinMaxScaler
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils

from IPython.display import HTML as html_print
from IPython.display import display
import tensorflow.keras.backend as K

#html 
def cstr(s, color='black'):
    if s == ' ':
      return "<text style=color:#000;padding-left:10px;background-color:{}> </text>".format(color, s)
    else:
      return "<text style=color:#000;background-color:{}>{} </text>".format(color, s)
	
# print html
def print_color(t):
	  display(html_print(''.join([cstr(ti, color=ci) for ti,ci in t])))

# get color
def get_clr(value):
    colors = ['#85c2e1', '#89c4e2', '#95cae5', '#99cce6', '#a1d0e8'
      '#b2d9ec', '#baddee', '#c2e1f0', '#eff7fb', '#f9e8e8',
      '#f9e8e8', '#f9d4d4', '#f9bdbd', '#f8a8a8', '#f68f8f',
      '#f47676', '#f45f5f', '#f34343', '#f33b3b', '#f42e2e']
    value = int(value * 19)
    if value == 19:
        value -= 1
    return colors[value]

def sigmoid(x):
    z = 1/(1 + np.exp(-x)) 
    return z

def softmax(x):
    v = np.exp(x)
    v = v / np.sum(v)
    return v

def get_gradient_norms(grad_list, word, activation="sigmoid"):
    grad_norms = []
    for grad_tensor in grad_list:
        grad_mags = tf.norm(grad_tensor, axis=1)
        grad_mags = grad_mags[:len(word)]
        if activation == "softmax":
            grad_mags_scaled = softmax(grad_mags)
        elif activation == "scaler":
            scaler = MinMaxScaler()
            grad_mags = tf.reshape(grad_mags, (-1,1))
            grad_mags_scaled = scaler.fit_transform(grad_mags)
        else:
            grad_mags_scaled = sigmoid(grad_mags)
        grad_norms.append(grad_mags_scaled)
    return grad_norms

def visualize(grad_norms, word, translated_word):
    print("Original Word:", word)
    print("Transliterated Word:", translated_word)
    for i in range(len(translated_word)):
        print("Connectivity Visualization for", translated_word[i],":")
        text_colours = []
        for j in range(len(grad_norms[i])):
            text = (word[j], get_clr(grad_norms[i][j]))
            text_colours.append(text)
        print_color(text_colours)

def visualise_connectivity(model, word, activation="sigmoid"):
    translated_word, grad_list = get_connectivity(model, word)
    grad_norms = get_gradient_norms(grad_list, word, activation)
    visualize(grad_norms, word, translated_word)

#Testing without attention

In [13]:
#Testing on the dataset with the best hyper parameters found after sweeps.
model = test_on_dataset(language="hi",
                        embedding_dim=256,
                        encoder_layers=3,
                        decoder_layers=3,
                        layer_type="lstm",
                        units=256,
                        dropout=0.2,
                        attention=False, save_outputs='predictions_vanilla.csv')

----------------------------------------------------------------------------------------------------
EPOCH 1

Training ...

Batch 1 Loss 3.9883
Batch 100 Loss 1.0728
Batch 200 Loss 0.9839
Batch 300 Loss 0.9155

Validating ...

Train Loss: 1.1216 Train Accuracy: 66.9055 Validation Loss: 2.8826 Validation Accuracy: 44.6775

Time taken for the epoch 120.6969
----------------------------------------------------------------------------------------------------
EPOCH 2

Training ...

Batch 1 Loss 0.8923
Batch 100 Loss 0.8731
Batch 200 Loss 0.7926
Batch 300 Loss 0.7589

Validating ...

Train Loss: 0.8292 Train Accuracy: 74.8300 Validation Loss: 3.2128 Validation Accuracy: 45.6532

Time taken for the epoch 33.6026
----------------------------------------------------------------------------------------------------
EPOCH 3

Training ...

Batch 1 Loss 0.7498
Batch 100 Loss 0.6860
Batch 200 Loss 0.5596
Batch 300 Loss 0.5896

Validating ...

Train Loss: 0.6396 Train Accuracy: 79.4184 Validation Loss

#Testing with attention

In [16]:
model2 = test_on_dataset(language="hi",
                        embedding_dim=256,
                        encoder_layers=3,
                        decoder_layers=3,
                        layer_type="lstm",
                        units=256,
                        dropout=0.2,
                        attention=True, save_outputs='predictions_attention.csv')

----------------------------------------------------------------------------------------------------
EPOCH 1

Training ...

Batch 1 Loss 3.9899
Batch 100 Loss 1.0848
Batch 200 Loss 0.9344
Batch 300 Loss 0.8751

Validating ...

Train Loss: 1.0525 Train Accuracy: 68.1665 Validation Loss: 2.2913 Validation Accuracy: 52.4259

Time taken for the epoch 136.9153
----------------------------------------------------------------------------------------------------
EPOCH 2

Training ...

Batch 1 Loss 0.8645
Batch 100 Loss 0.8185
Batch 200 Loss 0.7586
Batch 300 Loss 0.6920

Validating ...

Train Loss: 0.7778 Train Accuracy: 75.8738 Validation Loss: 2.4905 Validation Accuracy: 52.4783

Time taken for the epoch 40.7995
----------------------------------------------------------------------------------------------------
EPOCH 3

Training ...

Batch 1 Loss 0.6454
Batch 100 Loss 0.6030
Batch 200 Loss 0.5231
Batch 300 Loss 0.4283

Validating ...

Train Loss: 0.5481 Train Accuracy: 80.8016 Validation Loss

In [17]:
def get_test_words(n):
    test_df = pd.read_csv(get_data_files("hi")[2])
    test_sample = test_df.sample(n)
    test_sample.reset_index(inplace=True, drop=True)
    test_words = []
    for i in test_sample.index:
        entry = test_sample["अंक\tank\t5"].loc[i]
        parts = entry.split("\t")
        word = parts[1]
        test_words.append(word)
    return test_words

test_words = get_test_words(5)
print(test_words)

['teekamgarh', 'sandli', 'singer', 'brahmleen', 'sandrbh']


In [18]:
for word in test_words:
    visualise_connectivity(model2, word, activation="scaler")

Original Word: teekamgarh
Transliterated Word: तीकामगढ़
Connectivity Visualization for त :


Connectivity Visualization for ी :


Connectivity Visualization for क :


Connectivity Visualization for ा :


Connectivity Visualization for म :


Connectivity Visualization for ग :


Connectivity Visualization for ढ :


Connectivity Visualization for ़ :


Original Word: sandli
Transliterated Word: सांडली
Connectivity Visualization for स :


Connectivity Visualization for ा :


Connectivity Visualization for ं :


Connectivity Visualization for ड :


Connectivity Visualization for ल :


Connectivity Visualization for ी :


Original Word: singer
Transliterated Word: सिंगर
Connectivity Visualization for स :


Connectivity Visualization for ि :


Connectivity Visualization for ं :


Connectivity Visualization for ग :


Connectivity Visualization for र :


Original Word: brahmleen
Transliterated Word: ब्रह्मलीन
Connectivity Visualization for ब :


Connectivity Visualization for ् :


Connectivity Visualization for र :


Connectivity Visualization for ह :


Connectivity Visualization for ् :


Connectivity Visualization for म :


Connectivity Visualization for ल :


Connectivity Visualization for ी :


Connectivity Visualization for न :


Original Word: sandrbh
Transliterated Word: संदृभ
Connectivity Visualization for स :


Connectivity Visualization for ं :


Connectivity Visualization for द :


Connectivity Visualization for ृ :


Connectivity Visualization for भ :


In [14]:
randomly_evaluate(model, n=5)

Randomly evaluating the model on 5 words

Input word: nihaalchand
Actual translation: निहालचंद
Model translation: निहीलचंद

Input word: paheliyan
Actual translation: पहेलियां
Model translation: पहिलियां

Input word: then
Actual translation: थेन
Model translation: थें

Input word: batlaakar
Actual translation: बतलाकर
Model translation: बतलाकर

Input word: nabj
Actual translation: नब्ज़
Model translation: नबाज



In [16]:
wandb.login()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize


wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit: ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

#WandB training

In [17]:
def train_with_wandb(language):

    config_defaults = {"embedding_dim": 64, 
                       "enc_dec_layers": 1,
                       "layer_type": "lstm",
                       "units": 128,
                       "dropout": 0,
                       "attention": False,
                       "teacher_forcing_ratio": 1.0
                       }

    wandb.init(config=config_defaults, project="cs6910-assignment3", resume=True)
    # Below is an example of a custom run name for sweep 4
    # This line was different for all sweeps

    ## 1. SELECT LANGUAGE ##
    TRAIN_TSV, VAL_TSV, TEST_TSV = get_data_files(language)

    ## 2. DATA PREPROCESSING ##
    dataset, input_tokenizer, targ_tokenizer = preprocess_data(TRAIN_TSV)
    val_dataset, _, _ = preprocess_data(VAL_TSV, input_tokenizer, targ_tokenizer)

    ## 3. CREATING THE MODEL ##
    model = Seq2SeqModel(embedding_dim=wandb.config.embedding_dim,
                         encoder_layers=wandb.config.enc_dec_layers,
                         decoder_layers=wandb.config.enc_dec_layers,
                         layer_type=wandb.config.layer_type,
                         units=wandb.config.units,
                         dropout=wandb.config.dropout,
                         attention=wandb.config.attention)
    
    ## 4. COMPILING THE MODEL 
    model.set_vocabulary(input_tokenizer, targ_tokenizer)
    model.build(loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                optimizer = tf.keras.optimizers.Adam(),
                metric = tf.keras.metrics.SparseCategoricalAccuracy())
    
    ## 5. FITTING AND VALIDATING THE MODEL
    model.fit(dataset, val_dataset, epochs=20, use_wandb=True, teacher_forcing_ratio=wandb.config.teacher_forcing_ratio)

#First sweep

In [20]:
sweep_config = {
  "name": "Sweep 1- Assignment3",
  "method": "grid",
  "parameters": {
        "enc_dec_layers": {
           "values": [1, 2, 3]
        },
        "units": {
            "values": [64, 128, 256]
        },
        "layer_type": {
            "values": ["rnn", "gru", "lstm"]
        }
    }
}

In [25]:
sweep_id = wandb.sweep(sweep_config, project="cs6910-assignment3")

Create sweep with ID: l64qp4dv
Sweep URL: https://wandb.ai/harshagudivada/cs6910-assignment3/sweeps/l64qp4dv


In [26]:
wandb.agent(sweep_id, function=lambda: train_with_wandb("hi"))

[34m[1mwandb[0m: Agent Starting Run: s15251gu with config:
[34m[1mwandb[0m: 	enc_dec_layers: 1
[34m[1mwandb[0m: 	layer_type: rnn
[34m[1mwandb[0m: 	units: 64


----------------------------------------------------------------------------------------------------
EPOCH 1

Training ...

Batch 1 Loss 4.0162
Batch 100 Loss 1.1844
Batch 200 Loss 1.0742
Batch 300 Loss 0.9797

Validating ...

Train Loss: 1.2067 Train Accuracy: 63.2089 Validation Loss: 2.3059 Validation Accuracy: 47.9538

Time taken for the epoch 27.4635
----------------------------------------------------------------------------------------------------
EPOCH 2

Training ...

Batch 1 Loss 0.9280
Batch 100 Loss 0.9105
Batch 200 Loss 0.8629
Batch 300 Loss 0.8310

Validating ...

Train Loss: 0.8789 Train Accuracy: 74.9216 Validation Loss: 2.6213 Validation Accuracy: 48.3824

Time taken for the epoch 12.8931
----------------------------------------------------------------------------------------------------
EPOCH 3

Training ...

Batch 1 Loss 0.8129
Batch 100 Loss 0.8283
Batch 200 Loss 0.7775
Batch 300 Loss 0.7671

Validating ...

Train Loss: 0.8017 Train Accuracy: 77.0058 Validation Loss:

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train acc,▁▅▆▆▇▇▇▇▇▇▇█████████
train loss,█▅▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁
training time,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val acc,▇▇▇█▃▃▂▁▃▃▅▃▂▂▃▃▂▄▂▃
val loss,▁▂▃▃▅▅▆▆▆▆▆▆▇▇▇▇█▇██

0,1
epoch,20.0
train acc,83.92747
train loss,0.51406
training time,13.00186
val acc,44.06693
val loss,4.47287


[34m[1mwandb[0m: Agent Starting Run: 2bxog4mc with config:
[34m[1mwandb[0m: 	enc_dec_layers: 1
[34m[1mwandb[0m: 	layer_type: rnn
[34m[1mwandb[0m: 	units: 128


----------------------------------------------------------------------------------------------------
EPOCH 1

Training ...

Batch 1 Loss 4.0132
Batch 100 Loss 1.0891
Batch 200 Loss 1.0175
Batch 300 Loss 0.9905

Validating ...

Train Loss: 1.1449 Train Accuracy: 65.9362 Validation Loss: 3.4433 Validation Accuracy: 36.5478

Time taken for the epoch 29.1032
----------------------------------------------------------------------------------------------------
EPOCH 2

Training ...

Batch 1 Loss 0.9687
Batch 100 Loss 0.9336
Batch 200 Loss 0.9290
Batch 300 Loss 0.9605

Validating ...

Train Loss: 0.9431 Train Accuracy: 73.0482 Validation Loss: 3.5167 Validation Accuracy: 36.1969

Time taken for the epoch 14.8346
----------------------------------------------------------------------------------------------------
EPOCH 3

Training ...

Batch 1 Loss 0.9328
Batch 100 Loss 0.8830
Batch 200 Loss 0.9686
Batch 300 Loss 0.9186

Validating ...

Train Loss: 0.9145 Train Accuracy: 73.4536 Validation Loss:

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train acc,▁▆▆▆▆▇▇▇▇▇▇▇████████
train loss,█▄▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁
training time,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val acc,▁▁▄▁▄▄▄▄█▁█▄▄█▃▄▃▄▁▃
val loss,▃▃▂▄▂▂▃▃▁▇▁▄▄▂▄▄▄▄█▅

0,1
epoch,20.0
train acc,76.47495
train loss,0.79476
training time,14.65854
val acc,41.70057
val loss,4.30977


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: bq5ffc8f with config:
[34m[1mwandb[0m: 	enc_dec_layers: 1
[34m[1mwandb[0m: 	layer_type: rnn
[34m[1mwandb[0m: 	units: 256


----------------------------------------------------------------------------------------------------
EPOCH 1

Training ...

Batch 1 Loss 3.9732
Batch 100 Loss 1.1061
Batch 200 Loss 0.9567
Batch 300 Loss 0.9664

Validating ...

Train Loss: 1.0976 Train Accuracy: 67.2981 Validation Loss: 2.2364 Validation Accuracy: 53.2103

Time taken for the epoch 30.8828
----------------------------------------------------------------------------------------------------
EPOCH 2

Training ...

Batch 1 Loss 0.9278
Batch 100 Loss 0.9633
Batch 200 Loss 0.8982
Batch 300 Loss 0.8816

Validating ...

Train Loss: 0.9320 Train Accuracy: 73.1107 Validation Loss: 2.4037 Validation Accuracy: 56.0731

Time taken for the epoch 15.5107
----------------------------------------------------------------------------------------------------
EPOCH 3

Training ...

Batch 1 Loss 0.8958
Batch 100 Loss 0.8967
Batch 200 Loss 0.9257
Batch 300 Loss 0.9080

Validating ...

Train Loss: 0.9027 Train Accuracy: 73.6261 Validation Loss:

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train acc,▁▅▅▅▆▆▆▆▇▇▇▇▇▇▇█████
train loss,█▅▄▄▄▃▃▃▃▂▂▂▂▂▂▁▁▁▁▁
training time,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val acc,▇█▄▆█▄█▁█▄▁▇█▁▁▁█▆▁▇
val loss,▁▁▃▂▂▄▂▅▂▄▆▂▂▇▇▇▂▃█▂

0,1
epoch,20.0
train acc,78.37331
train loss,0.72196
training time,15.24405
val acc,53.05576
val loss,3.36873


[34m[1mwandb[0m: Agent Starting Run: gt9vf7n6 with config:
[34m[1mwandb[0m: 	enc_dec_layers: 1
[34m[1mwandb[0m: 	layer_type: gru
[34m[1mwandb[0m: 	units: 64


----------------------------------------------------------------------------------------------------
EPOCH 1

Training ...

Batch 1 Loss 3.9850
Batch 100 Loss 1.2311
Batch 200 Loss 1.1029
Batch 300 Loss 1.0380

Validating ...

Train Loss: 1.3463 Train Accuracy: 63.7597 Validation Loss: 2.7036 Validation Accuracy: 39.5280

Time taken for the epoch 38.5021
----------------------------------------------------------------------------------------------------
EPOCH 2

Training ...

Batch 1 Loss 1.0102
Batch 100 Loss 0.9028
Batch 200 Loss 0.9406
Batch 300 Loss 0.9363

Validating ...

Train Loss: 0.9465 Train Accuracy: 72.9261 Validation Loss: 2.6283 Validation Accuracy: 45.9883

Time taken for the epoch 9.4675
----------------------------------------------------------------------------------------------------
EPOCH 3

Training ...

Batch 1 Loss 0.9135
Batch 100 Loss 0.9136
Batch 200 Loss 0.8952
Batch 300 Loss 0.9126

Validating ...

Train Loss: 0.8933 Train Accuracy: 74.1297 Validation Loss: 

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train acc,▁▄▄▅▅▅▅▅▅▆▆▆▇▇▇▇▇███
train loss,█▅▄▄▄▄▄▄▃▃▃▂▂▂▂▂▁▁▁▁
training time,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val acc,▁▄▄▇▆▆▅▄▆▅▅▆▆▆▇▇▇███
val loss,▅▄▄▁▃▃▆▇▆██▇█████▇▆▇

0,1
epoch,20.0
train acc,85.31684
train loss,0.46682
training time,9.46548
val acc,56.48913
val loss,2.96542


[34m[1mwandb[0m: Agent Starting Run: zld7i4qy with config:
[34m[1mwandb[0m: 	enc_dec_layers: 1
[34m[1mwandb[0m: 	layer_type: gru
[34m[1mwandb[0m: 	units: 128


----------------------------------------------------------------------------------------------------
EPOCH 1

Training ...

Batch 1 Loss 3.9962
Batch 100 Loss 1.2283
Batch 200 Loss 1.0356
Batch 300 Loss 0.9941

Validating ...

Train Loss: 1.2663 Train Accuracy: 64.8998 Validation Loss: 2.9369 Validation Accuracy: 38.5786

Time taken for the epoch 40.9375
----------------------------------------------------------------------------------------------------
EPOCH 2

Training ...

Batch 1 Loss 0.9294
Batch 100 Loss 0.9142
Batch 200 Loss 0.8942
Batch 300 Loss 0.9247

Validating ...

Train Loss: 0.9104 Train Accuracy: 73.6393 Validation Loss: 2.3318 Validation Accuracy: 51.6168

Time taken for the epoch 11.0349
----------------------------------------------------------------------------------------------------
EPOCH 3

Training ...

Batch 1 Loss 0.9365
Batch 100 Loss 0.8527
Batch 200 Loss 0.8397
Batch 300 Loss 0.8541

Validating ...

Train Loss: 0.8669 Train Accuracy: 74.8236 Validation Loss:

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train acc,▁▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇███
train loss,█▅▅▄▄▄▄▃▃▃▃▂▂▂▂▂▂▁▁▁
training time,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val acc,▁▅▅▃▃▄▄▄▄▄▄▅▅▆▆▆▇▇██
val loss,▅▁▂▆▇▅▆▇▇▇█▇▇▆▆▆▅▅▅▄

0,1
epoch,20.0
train acc,86.32695
train loss,0.4288
training time,11.07876
val acc,59.51086
val loss,2.91664


[34m[1mwandb[0m: Agent Starting Run: zqhh7qaa with config:
[34m[1mwandb[0m: 	enc_dec_layers: 1
[34m[1mwandb[0m: 	layer_type: gru
[34m[1mwandb[0m: 	units: 256


----------------------------------------------------------------------------------------------------
EPOCH 1

Training ...

Batch 1 Loss 3.9928
Batch 100 Loss 1.1951
Batch 200 Loss 1.0673
Batch 300 Loss 0.9616

Validating ...

Train Loss: 1.2028 Train Accuracy: 65.4167 Validation Loss: 2.8277 Validation Accuracy: 40.2119

Time taken for the epoch 41.6211
----------------------------------------------------------------------------------------------------
EPOCH 2

Training ...

Batch 1 Loss 0.9629
Batch 100 Loss 0.9069
Batch 200 Loss 0.8798
Batch 300 Loss 0.8587

Validating ...

Train Loss: 0.8980 Train Accuracy: 73.7311 Validation Loss: 3.1640 Validation Accuracy: 42.4303

Time taken for the epoch 11.4064
----------------------------------------------------------------------------------------------------
EPOCH 3

Training ...

Batch 1 Loss 0.8129
Batch 100 Loss 0.8276
Batch 200 Loss 0.8080
Batch 300 Loss 0.8376

Validating ...

Train Loss: 0.8360 Train Accuracy: 75.2387 Validation Loss:

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train acc,▁▃▃▄▄▄▅▅▅▆▆▆▇▇▇▇▇███
train loss,█▆▅▅▅▄▄▄▃▃▃▃▂▂▂▂▁▁▁▁
training time,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val acc,▁▁▂▂▃▄▃▄▄▅▄▅▆▆▆▇▇███
val loss,▅▇██▇▆█▇▇▆▇▆▅▅▄▂▂▁▂▁

0,1
epoch,20.0
train acc,94.16086
train loss,0.18406
training time,11.57168
val acc,72.32613
val loss,2.33345


[34m[1mwandb[0m: Agent Starting Run: n2639p88 with config:
[34m[1mwandb[0m: 	enc_dec_layers: 1
[34m[1mwandb[0m: 	layer_type: lstm
[34m[1mwandb[0m: 	units: 64


----------------------------------------------------------------------------------------------------
EPOCH 1

Training ...

Batch 1 Loss 3.9920
Batch 100 Loss 1.2367
Batch 200 Loss 1.1685
Batch 300 Loss 1.1241

Validating ...

Train Loss: 1.3329 Train Accuracy: 64.0875 Validation Loss: 2.0543 Validation Accuracy: 57.3196

Time taken for the epoch 42.7186
----------------------------------------------------------------------------------------------------
EPOCH 2

Training ...

Batch 1 Loss 1.1191
Batch 100 Loss 1.0025
Batch 200 Loss 0.9366
Batch 300 Loss 0.9151

Validating ...

Train Loss: 0.9759 Train Accuracy: 72.1594 Validation Loss: 2.1039 Validation Accuracy: 50.6612

Time taken for the epoch 11.1753
----------------------------------------------------------------------------------------------------
EPOCH 3

Training ...

Batch 1 Loss 0.9858
Batch 100 Loss 0.8990
Batch 200 Loss 0.8881
Batch 300 Loss 0.8505

Validating ...

Train Loss: 0.8994 Train Accuracy: 74.0192 Validation Loss:

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train acc,▁▃▄▄▄▅▆▆▆▇▇▇▇▇██████
train loss,█▆▅▅▄▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁
training time,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val acc,▃▁▂▂▃▃▄▅▅▆▆▆▇▇▇▇▇███
val loss,▇█▆███▆▅▅▄▄▃▂▂▂▁▁▁▁▁

0,1
epoch,20.0
train acc,91.67356
train loss,0.25482
training time,10.96658
val acc,75.35545
val loss,1.55171


[34m[1mwandb[0m: Agent Starting Run: 2x642vgv with config:
[34m[1mwandb[0m: 	enc_dec_layers: 1
[34m[1mwandb[0m: 	layer_type: lstm
[34m[1mwandb[0m: 	units: 128


----------------------------------------------------------------------------------------------------
EPOCH 1

Training ...

Batch 1 Loss 3.9972
Batch 100 Loss 1.2539
Batch 200 Loss 1.0488
Batch 300 Loss 1.0036

Validating ...

Train Loss: 1.2356 Train Accuracy: 65.1146 Validation Loss: 2.1991 Validation Accuracy: 57.0478

Time taken for the epoch 43.7406
----------------------------------------------------------------------------------------------------
EPOCH 2

Training ...

Batch 1 Loss 1.0076
Batch 100 Loss 0.9841
Batch 200 Loss 0.9292
Batch 300 Loss 0.8646

Validating ...

Train Loss: 0.9474 Train Accuracy: 72.5931 Validation Loss: 2.0973 Validation Accuracy: 53.8695

Time taken for the epoch 11.5563
----------------------------------------------------------------------------------------------------
EPOCH 3

Training ...

Batch 1 Loss 0.8789
Batch 100 Loss 0.8987
Batch 200 Loss 0.9031
Batch 300 Loss 0.8738

Validating ...

Train Loss: 0.8707 Train Accuracy: 74.7350 Validation Loss:

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train acc,▁▃▃▄▄▅▆▆▇▇▇▇▇███████
train loss,█▆▆▅▄▄▃▂▂▂▂▂▁▁▁▁▁▁▁▁
training time,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val acc,▂▁▂▁▂▄▅▅▆▆▇▇▇███████
val loss,▇▆▆█▆▅▄▃▂▂▂▁▂▁▁▁▁▁▁▁

0,1
epoch,20.0
train acc,94.6893
train loss,0.162
training time,11.4842
val acc,79.73342
val loss,1.41906


[34m[1mwandb[0m: Agent Starting Run: rpvlb8y6 with config:
[34m[1mwandb[0m: 	enc_dec_layers: 1
[34m[1mwandb[0m: 	layer_type: lstm
[34m[1mwandb[0m: 	units: 256


----------------------------------------------------------------------------------------------------
EPOCH 1

Training ...

Batch 1 Loss 3.9901
Batch 100 Loss 1.1048
Batch 200 Loss 1.0544
Batch 300 Loss 0.9992

Validating ...

Train Loss: 1.1506 Train Accuracy: 66.6555 Validation Loss: 3.0604 Validation Accuracy: 38.1253

Time taken for the epoch 44.1408
----------------------------------------------------------------------------------------------------
EPOCH 2

Training ...

Batch 1 Loss 0.9659
Batch 100 Loss 0.8997
Batch 200 Loss 0.8506
Batch 300 Loss 0.8496

Validating ...

Train Loss: 0.8770 Train Accuracy: 74.2281 Validation Loss: 3.1814 Validation Accuracy: 42.7370

Time taken for the epoch 11.9551
----------------------------------------------------------------------------------------------------
EPOCH 3

Training ...

Batch 1 Loss 0.8133
Batch 100 Loss 0.8596
Batch 200 Loss 0.7726
Batch 300 Loss 0.8030

Validating ...

Train Loss: 0.7909 Train Accuracy: 76.0395 Validation Loss:

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train acc,▁▃▃▄▄▅▅▆▆▆▇▇▇▇▇█████
train loss,█▆▆▅▅▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁
training time,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val acc,▁▂▂▁▃▃▄▄▅▅▆▆▆▇▇▇████
val loss,▅▅▅█▅▆▅▄▄▄▃▃▃▂▂▂▁▁▂▁

0,1
epoch,20.0
train acc,95.8823
train loss,0.12978
training time,11.82671
val acc,76.54502
val loss,1.82271


[34m[1mwandb[0m: Agent Starting Run: yj1mdsqv with config:
[34m[1mwandb[0m: 	enc_dec_layers: 2
[34m[1mwandb[0m: 	layer_type: rnn
[34m[1mwandb[0m: 	units: 64


----------------------------------------------------------------------------------------------------
EPOCH 1

Training ...

Batch 1 Loss 3.9490
Batch 100 Loss 1.2727
Batch 200 Loss 1.1047
Batch 300 Loss 0.9777

Validating ...

Train Loss: 1.2635 Train Accuracy: 64.8533 Validation Loss: 2.2320 Validation Accuracy: 50.1420

Time taken for the epoch 48.4924
----------------------------------------------------------------------------------------------------
EPOCH 2

Training ...

Batch 1 Loss 1.0075
Batch 100 Loss 0.9838
Batch 200 Loss 0.8453
Batch 300 Loss 0.8654

Validating ...

Train Loss: 0.9011 Train Accuracy: 74.3059 Validation Loss: 2.0438 Validation Accuracy: 52.4832

Time taken for the epoch 22.2788
----------------------------------------------------------------------------------------------------
EPOCH 3

Training ...

Batch 1 Loss 0.8239
Batch 100 Loss 0.8387
Batch 200 Loss 0.7430
Batch 300 Loss 0.7499

Validating ...

Train Loss: 0.7784 Train Accuracy: 77.6760 Validation Loss:

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train acc,▁▄▅▅▆▆▇▇▇▇▇█████████
train loss,█▅▄▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁
training time,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val acc,▅█▅▂▂▁▅▃▆▂▄▃▇▅▃▇▄▆▅▃
val loss,▂▁▃▅▅▆▆▆▅▆▆▇▆▇█▆▇▇▇█

0,1
epoch,20.0
train acc,88.96101
train loss,0.34279
training time,22.25735
val acc,48.84153
val loss,3.39686


[34m[1mwandb[0m: Agent Starting Run: wc8l325r with config:
[34m[1mwandb[0m: 	enc_dec_layers: 2
[34m[1mwandb[0m: 	layer_type: rnn
[34m[1mwandb[0m: 	units: 128


----------------------------------------------------------------------------------------------------
EPOCH 1

Training ...

Batch 1 Loss 4.1200
Batch 100 Loss 1.0875
Batch 200 Loss 0.9299
Batch 300 Loss 0.8412

Validating ...

Train Loss: 1.0635 Train Accuracy: 66.1752 Validation Loss: 2.4857 Validation Accuracy: 48.0324

Time taken for the epoch 53.2194
----------------------------------------------------------------------------------------------------
EPOCH 2

Training ...

Batch 1 Loss 0.7540
Batch 100 Loss 0.7358
Batch 200 Loss 0.6613
Batch 300 Loss 0.6864

Validating ...

Train Loss: 0.7018 Train Accuracy: 79.1032 Validation Loss: 2.9735 Validation Accuracy: 46.6360

Time taken for the epoch 25.8745
----------------------------------------------------------------------------------------------------
EPOCH 3

Training ...

Batch 1 Loss 0.6187
Batch 100 Loss 0.5482
Batch 200 Loss 0.5156
Batch 300 Loss 0.5029

Validating ...

Train Loss: 0.5580 Train Accuracy: 82.7982 Validation Loss:

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train acc,▁▅▆▆▇▇▇▇▇▇██████████
train loss,█▅▄▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
training time,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val acc,▅▃█▅▇▄▆▅▅▇▇▅▃▆▇▁▅▄▅▄
val loss,▁▃▂▃▃▄▄▅▅▅▅▆▆▅▅█▆▇▇▇

0,1
epoch,20.0
train acc,91.76124
train loss,0.2505
training time,25.67732
val acc,47.61367
val loss,3.94109


[34m[1mwandb[0m: Agent Starting Run: 8zmmi4a0 with config:
[34m[1mwandb[0m: 	enc_dec_layers: 2
[34m[1mwandb[0m: 	layer_type: rnn
[34m[1mwandb[0m: 	units: 256


----------------------------------------------------------------------------------------------------
EPOCH 1

Training ...

Batch 1 Loss 4.0853
Batch 100 Loss 1.0874
Batch 200 Loss 0.9336
Batch 300 Loss 0.9033

Validating ...

Train Loss: 1.1138 Train Accuracy: 66.2955 Validation Loss: 2.3488 Validation Accuracy: 54.3357

Time taken for the epoch 54.7784
----------------------------------------------------------------------------------------------------
EPOCH 2

Training ...

Batch 1 Loss 0.8677
Batch 100 Loss 0.7909
Batch 200 Loss 0.9607
Batch 300 Loss 0.9215

Validating ...

Train Loss: 0.9124 Train Accuracy: 74.9730 Validation Loss: 2.4690 Validation Accuracy: 53.5057

Time taken for the epoch 27.8660
----------------------------------------------------------------------------------------------------
EPOCH 3

Training ...

Batch 1 Loss 0.9099
Batch 100 Loss 0.8801
Batch 200 Loss 0.9032
Batch 300 Loss 0.9133

Validating ...

Train Loss: 0.9256 Train Accuracy: 73.2382 Validation Loss:

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train acc,▁▆▅▅▅▆▆▆▆▇▇▇▇▇▇█████
train loss,█▄▅▄▄▄▃▃▃▃▂▂▂▂▂▂▁▁▁▁
training time,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val acc,██▅▄▃▇▃▇▇▃▃▃██▄▃▆▆▁▅
val loss,▁▁▃▃▄▂▅▂▂▅▅▅▂▂▄▇▃▃█▅

0,1
epoch,20.0
train acc,78.6109
train loss,0.71182
training time,27.46749
val acc,42.12474
val loss,4.96834


[34m[1mwandb[0m: Agent Starting Run: zd602mqn with config:
[34m[1mwandb[0m: 	enc_dec_layers: 2
[34m[1mwandb[0m: 	layer_type: gru
[34m[1mwandb[0m: 	units: 64


----------------------------------------------------------------------------------------------------
EPOCH 1

Training ...

Batch 1 Loss 3.9852
Batch 100 Loss 1.3239
Batch 200 Loss 1.1436
Batch 300 Loss 1.0711

Validating ...

Train Loss: 1.3245 Train Accuracy: 64.5356 Validation Loss: 2.4451 Validation Accuracy: 43.4190

Time taken for the epoch 70.1218
----------------------------------------------------------------------------------------------------
EPOCH 2

Training ...

Batch 1 Loss 1.0338
Batch 100 Loss 0.9644
Batch 200 Loss 0.9111
Batch 300 Loss 0.8876

Validating ...

Train Loss: 0.9446 Train Accuracy: 72.8931 Validation Loss: 2.3157 Validation Accuracy: 50.5322

Time taken for the epoch 15.4815
----------------------------------------------------------------------------------------------------
EPOCH 3

Training ...

Batch 1 Loss 0.9708
Batch 100 Loss 0.9276
Batch 200 Loss 0.8521
Batch 300 Loss 0.8793

Validating ...

Train Loss: 0.8969 Train Accuracy: 73.9815 Validation Loss:

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train acc,▁▄▄▄▄▅▅▅▆▆▆▆▇▇▇▇▇███
train loss,█▅▅▄▄▄▄▃▃▃▃▂▂▂▂▂▁▁▁▁
training time,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val acc,▁▃▄▄▄▃▄▄▅▅▅▅▆▆▆▇▇▇▇█
val loss,▆▄▂▃▄█▆█▆▅▅▆▄▅▅▃▃▃▃▁

0,1
epoch,20.0
train acc,86.73315
train loss,0.42075
training time,15.68003
val acc,66.58375
val loss,2.15674


[34m[1mwandb[0m: Agent Starting Run: syfdmhk0 with config:
[34m[1mwandb[0m: 	enc_dec_layers: 2
[34m[1mwandb[0m: 	layer_type: gru
[34m[1mwandb[0m: 	units: 128


----------------------------------------------------------------------------------------------------
EPOCH 1

Training ...

Batch 1 Loss 3.9929
Batch 100 Loss 1.1907
Batch 200 Loss 1.1174
Batch 300 Loss 0.9287

Validating ...

Train Loss: 1.2240 Train Accuracy: 65.2279 Validation Loss: 2.7647 Validation Accuracy: 46.0675

Time taken for the epoch 73.5338
----------------------------------------------------------------------------------------------------
EPOCH 2

Training ...

Batch 1 Loss 0.9495
Batch 100 Loss 0.9466
Batch 200 Loss 0.8878
Batch 300 Loss 0.8383

Validating ...

Train Loss: 0.9096 Train Accuracy: 73.3202 Validation Loss: 2.7338 Validation Accuracy: 48.5973

Time taken for the epoch 18.8403
----------------------------------------------------------------------------------------------------
EPOCH 3

Training ...

Batch 1 Loss 0.8983
Batch 100 Loss 0.8078
Batch 200 Loss 0.8073
Batch 300 Loss 0.8471

Validating ...

Train Loss: 0.8307 Train Accuracy: 75.3665 Validation Loss:

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train acc,▁▃▄▄▄▅▅▅▆▆▆▆▇▇▇▇████
train loss,█▆▅▅▄▄▄▃▃▃▃▂▂▂▂▂▁▁▁▁
training time,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val acc,▁▂▂▁▂▂▃▃▄▄▄▄▅▅▆▆▇▇▇█
val loss,▅▅▆██▇▆▅▅▆▆▆▅▅▄▄▃▃▂▁

0,1
epoch,20.0
train acc,92.60529
train loss,0.23055
training time,18.68277
val acc,73.09554
val loss,2.05504


[34m[1mwandb[0m: Agent Starting Run: hnqon662 with config:
[34m[1mwandb[0m: 	enc_dec_layers: 2
[34m[1mwandb[0m: 	layer_type: gru
[34m[1mwandb[0m: 	units: 256


----------------------------------------------------------------------------------------------------
EPOCH 1

Training ...

Batch 1 Loss 3.9953
Batch 100 Loss 1.1542
Batch 200 Loss 0.9977
Batch 300 Loss 0.9393

Validating ...

Train Loss: 1.1484 Train Accuracy: 65.9665 Validation Loss: 3.3233 Validation Accuracy: 41.9640

Time taken for the epoch 74.1896
----------------------------------------------------------------------------------------------------
EPOCH 2

Training ...

Batch 1 Loss 0.9055
Batch 100 Loss 0.8843
Batch 200 Loss 0.8845
Batch 300 Loss 0.8694

Validating ...

Train Loss: 0.8839 Train Accuracy: 73.6741 Validation Loss: 3.5037 Validation Accuracy: 45.1890

Time taken for the epoch 19.0633
----------------------------------------------------------------------------------------------------
EPOCH 3

Training ...

Batch 1 Loss 0.8106
Batch 100 Loss 0.8167
Batch 200 Loss 0.8142
Batch 300 Loss 0.7548

Validating ...

Train Loss: 0.7725 Train Accuracy: 76.2944 Validation Loss:

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train acc,▁▃▃▄▄▅▆▆▇▇▇▇▇███████
train loss,█▆▆▅▄▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁
training time,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val acc,▁▂▃▂▃▄▅▅▆▆▇▇▇█▇█████
val loss,▇█▆▇▆▅▅▅▂▂▁▂▂▁▂▁▂▂▂▂

0,1
epoch,20.0
train acc,97.72421
train loss,0.07223
training time,19.36542
val acc,78.47469
val loss,2.00761


[34m[1mwandb[0m: Agent Starting Run: 1duzczil with config:
[34m[1mwandb[0m: 	enc_dec_layers: 2
[34m[1mwandb[0m: 	layer_type: lstm
[34m[1mwandb[0m: 	units: 64


----------------------------------------------------------------------------------------------------
EPOCH 1

Training ...

Batch 1 Loss 3.9839
Batch 100 Loss 1.2482
Batch 200 Loss 1.1118
Batch 300 Loss 1.0634

Validating ...

Train Loss: 1.3114 Train Accuracy: 65.1497 Validation Loss: 1.7903 Validation Accuracy: 58.3882

Time taken for the epoch 78.3903
----------------------------------------------------------------------------------------------------
EPOCH 2

Training ...

Batch 1 Loss 1.1211
Batch 100 Loss 1.0056
Batch 200 Loss 0.9013
Batch 300 Loss 0.9185

Validating ...

Train Loss: 0.9647 Train Accuracy: 72.2807 Validation Loss: 2.2554 Validation Accuracy: 48.7364

Time taken for the epoch 18.8984
----------------------------------------------------------------------------------------------------
EPOCH 3

Training ...

Batch 1 Loss 0.9015
Batch 100 Loss 0.9062
Batch 200 Loss 0.8946
Batch 300 Loss 0.8742

Validating ...

Train Loss: 0.9002 Train Accuracy: 73.8577 Validation Loss:

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train acc,▁▃▃▄▄▄▅▅▆▆▇▇▇▇▇█████
train loss,█▆▅▅▅▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁
training time,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val acc,▄▁▂▁▂▂▃▄▅▅▅▆▆▇▆▇▇███
val loss,▂▆▆█▆▇▅▄▅▄▅▄▄▃▃▂▂▁▁▁

0,1
epoch,20.0
train acc,91.57571
train loss,0.25955
training time,18.95214
val acc,73.73288
val loss,1.67514


[34m[1mwandb[0m: Agent Starting Run: 2gwbyjjj with config:
[34m[1mwandb[0m: 	enc_dec_layers: 2
[34m[1mwandb[0m: 	layer_type: lstm
[34m[1mwandb[0m: 	units: 128


----------------------------------------------------------------------------------------------------
EPOCH 1

Training ...

Batch 1 Loss 3.9874
Batch 100 Loss 1.1465
Batch 200 Loss 1.0665
Batch 300 Loss 0.9817

Validating ...

Train Loss: 1.2087 Train Accuracy: 66.1157 Validation Loss: 2.7647 Validation Accuracy: 40.2059

Time taken for the epoch 79.6547
----------------------------------------------------------------------------------------------------
EPOCH 2

Training ...

Batch 1 Loss 0.9856
Batch 100 Loss 0.9836
Batch 200 Loss 0.9318
Batch 300 Loss 0.9826

Validating ...

Train Loss: 0.9269 Train Accuracy: 73.1228 Validation Loss: 2.8276 Validation Accuracy: 45.5293

Time taken for the epoch 20.2986
----------------------------------------------------------------------------------------------------
EPOCH 3

Training ...

Batch 1 Loss 0.9293
Batch 100 Loss 0.8355
Batch 200 Loss 0.8480
Batch 300 Loss 0.8594

Validating ...

Train Loss: 0.8623 Train Accuracy: 74.4856 Validation Loss:

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train acc,▁▃▃▃▄▄▅▅▆▆▆▇▇▇▇█████
train loss,█▆▆▅▅▄▄▃▃▃▂▂▂▂▁▁▁▁▁▁
training time,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val acc,▁▂▁▂▂▃▄▄▄▅▆▆▆▇▇▇▇███
val loss,▆▆███▆▅▅▅▄▃▃▂▂▂▂▂▁▂▁

0,1
epoch,20.0
train acc,94.24357
train loss,0.17845
training time,20.34908
val acc,77.79655
val loss,1.59883


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: t5um03cm with config:
[34m[1mwandb[0m: 	enc_dec_layers: 2
[34m[1mwandb[0m: 	layer_type: lstm
[34m[1mwandb[0m: 	units: 256


----------------------------------------------------------------------------------------------------
EPOCH 1

Training ...

Batch 1 Loss 3.9891
Batch 100 Loss 1.0826
Batch 200 Loss 1.0341
Batch 300 Loss 0.9466

Validating ...

Train Loss: 1.1343 Train Accuracy: 66.6739 Validation Loss: 2.8426 Validation Accuracy: 44.6464

Time taken for the epoch 80.9445
----------------------------------------------------------------------------------------------------
EPOCH 2

Training ...

Batch 1 Loss 0.9340
Batch 100 Loss 0.8437
Batch 200 Loss 0.8430
Batch 300 Loss 0.8243

Validating ...

Train Loss: 0.8605 Train Accuracy: 74.2406 Validation Loss: 2.4669 Validation Accuracy: 51.8638

Time taken for the epoch 20.7690
----------------------------------------------------------------------------------------------------
EPOCH 3

Training ...

Batch 1 Loss 0.7948
Batch 100 Loss 0.7591
Batch 200 Loss 0.6787
Batch 300 Loss 0.6728

Validating ...

Train Loss: 0.7208 Train Accuracy: 77.1849 Validation Loss:

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train acc,▁▃▃▄▅▆▆▇▇▇▇▇████████
train loss,█▆▅▄▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁
training time,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val acc,▁▂▂▃▄▄▅▆▆▆▇▇████████
val loss,█▆█▇▆▅▅▃▃▃▁▁▁▁▁▁▁▁▂▂

0,1
epoch,20.0
train acc,98.10793
train loss,0.06126
training time,20.76782
val acc,80.27647
val loss,1.72303


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: k38jjh0h with config:
[34m[1mwandb[0m: 	enc_dec_layers: 3
[34m[1mwandb[0m: 	layer_type: rnn
[34m[1mwandb[0m: 	units: 64


----------------------------------------------------------------------------------------------------
EPOCH 1

Training ...

Batch 1 Loss 4.1155
Batch 100 Loss 1.1351
Batch 200 Loss 1.0711
Batch 300 Loss 0.9122

Validating ...

Train Loss: 1.2012 Train Accuracy: 62.6334 Validation Loss: 2.0008 Validation Accuracy: 52.0232

Time taken for the epoch 70.9818
----------------------------------------------------------------------------------------------------
EPOCH 2

Training ...

Batch 1 Loss 0.9258
Batch 100 Loss 0.8325
Batch 200 Loss 0.9003
Batch 300 Loss 0.8573

Validating ...

Train Loss: 0.8807 Train Accuracy: 74.7541 Validation Loss: 2.1952 Validation Accuracy: 54.9448

Time taken for the epoch 31.3043
----------------------------------------------------------------------------------------------------
EPOCH 3

Training ...

Batch 1 Loss 0.7810
Batch 100 Loss 0.7957
Batch 200 Loss 0.7865
Batch 300 Loss 0.7469

Validating ...

Train Loss: 0.7688 Train Accuracy: 77.8048 Validation Loss:

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train acc,▁▄▅▆▆▆▇▇▇▇▇▇▇███████
train loss,█▅▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁
training time,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val acc,▁▆▄▄▅▅▆▆▆▆▆███▆▇▇▇▇█
val loss,▁▂▄▅▆████▇▇▇█▇▇▇███▇

0,1
epoch,20.0
train acc,87.53851
train loss,0.38628
training time,31.78701
val acc,55.86272
val loss,3.59262


[34m[1mwandb[0m: Agent Starting Run: 5pdo2j28 with config:
[34m[1mwandb[0m: 	enc_dec_layers: 3
[34m[1mwandb[0m: 	layer_type: rnn
[34m[1mwandb[0m: 	units: 128


----------------------------------------------------------------------------------------------------
EPOCH 1

Training ...

Batch 1 Loss 4.1751
Batch 100 Loss 1.2513
Batch 200 Loss 1.0926
Batch 300 Loss 1.0709

Validating ...

Train Loss: 1.2608 Train Accuracy: 61.6220 Validation Loss: 2.1539 Validation Accuracy: 57.0945

Time taken for the epoch 76.6919
----------------------------------------------------------------------------------------------------
EPOCH 2

Training ...

Batch 1 Loss 1.0271
Batch 100 Loss 0.9403
Batch 200 Loss 0.9638
Batch 300 Loss 0.9437

Validating ...

Train Loss: 0.9862 Train Accuracy: 71.8184 Validation Loss: 2.2134 Validation Accuracy: 55.9151

Time taken for the epoch 37.8699
----------------------------------------------------------------------------------------------------
EPOCH 3

Training ...

Batch 1 Loss 0.9702
Batch 100 Loss 0.8936
Batch 200 Loss 0.9234
Batch 300 Loss 0.8759

Validating ...

Train Loss: 0.9164 Train Accuracy: 73.2776 Validation Loss:

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train acc,▁▄▄▅▆▇▇▇▇▇▇▇████████
train loss,█▆▅▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁
training time,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val acc,██▃▃▃▄▄▃▄▁▄▅▅▅▄▅▆▅▅▅
val loss,▁▁▃▄▅▅▅▆▆█▆▆▆▇▇▇▆▇▇▇

0,1
epoch,20.0
train acc,86.02562
train loss,0.44718
training time,37.47461
val acc,50.04595
val loss,4.15782


[34m[1mwandb[0m: Agent Starting Run: ayk0bkjy with config:
[34m[1mwandb[0m: 	enc_dec_layers: 3
[34m[1mwandb[0m: 	layer_type: rnn
[34m[1mwandb[0m: 	units: 256


----------------------------------------------------------------------------------------------------
EPOCH 1

Training ...

Batch 1 Loss 4.2342
Batch 100 Loss 1.4428
Batch 200 Loss 1.0856
Batch 300 Loss 0.9962

Validating ...

Train Loss: 1.4354 Train Accuracy: 59.9843 Validation Loss: 2.1650 Validation Accuracy: 57.0908

Time taken for the epoch 79.0098
----------------------------------------------------------------------------------------------------
EPOCH 2

Training ...

Batch 1 Loss 1.0592
Batch 100 Loss 1.0244
Batch 200 Loss 0.9489
Batch 300 Loss 0.9616

Validating ...

Train Loss: 0.9825 Train Accuracy: 71.8144 Validation Loss: 2.2995 Validation Accuracy: 53.6423

Time taken for the epoch 39.9929
----------------------------------------------------------------------------------------------------
EPOCH 3

Training ...

Batch 1 Loss 0.9534
Batch 100 Loss 0.9441
Batch 200 Loss 0.9045
Batch 300 Loss 0.9414

Validating ...

Train Loss: 0.9403 Train Accuracy: 72.8768 Validation Loss:

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train acc,▁▆▆▆▆▇▇▇▇▇█████▇▆▇▇▇
train loss,█▃▃▃▃▂▂▂▂▂▁▁▁▁▁▃▃▂▂▂
training time,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val acc,█▇██▄▄▃▃▁▇▇█▃▆▃▇▆▇█▆
val loss,▁▁▂▂▄▄▅▆█▂▃▂▆▃▇▃▅▄▃▅

0,1
epoch,20.0
train acc,75.04474
train loss,0.84319
training time,40.10258
val acc,48.06845
val loss,4.10234


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: zz10jl3n with config:
[34m[1mwandb[0m: 	enc_dec_layers: 3
[34m[1mwandb[0m: 	layer_type: gru
[34m[1mwandb[0m: 	units: 64


----------------------------------------------------------------------------------------------------
EPOCH 1

Training ...

Batch 1 Loss 3.9889
Batch 100 Loss 1.2949
Batch 200 Loss 1.1432
Batch 300 Loss 1.0017

Validating ...

Train Loss: 1.3063 Train Accuracy: 64.7683 Validation Loss: 2.3187 Validation Accuracy: 57.2296

Time taken for the epoch 101.9683
----------------------------------------------------------------------------------------------------
EPOCH 2

Training ...

Batch 1 Loss 0.9613
Batch 100 Loss 0.9969
Batch 200 Loss 0.9353
Batch 300 Loss 0.9082

Validating ...

Train Loss: 0.9546 Train Accuracy: 72.1390 Validation Loss: 2.2538 Validation Accuracy: 58.7825

Time taken for the epoch 22.1279
----------------------------------------------------------------------------------------------------
EPOCH 3

Training ...

Batch 1 Loss 0.9522
Batch 100 Loss 0.8731
Batch 200 Loss 0.8605
Batch 300 Loss 0.8475

Validating ...

Train Loss: 0.8595 Train Accuracy: 75.0378 Validation Loss

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train acc,▁▃▄▅▅▅▅▆▆▆▇▇▇▇▇▇████
train loss,█▅▅▄▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁
training time,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val acc,▁▂▂▃▄▂▃▃▃▅▅▅▅▆▆▆▇▇██
val loss,▅▄▅▆▇█▃▄▄▁▁▄▅▄▃▄▄▄▃▄

0,1
epoch,20.0
train acc,88.2862
train loss,0.36477
training time,21.6911
val acc,68.14121
val loss,2.24336


[34m[1mwandb[0m: Agent Starting Run: 5rn45eh1 with config:
[34m[1mwandb[0m: 	enc_dec_layers: 3
[34m[1mwandb[0m: 	layer_type: gru
[34m[1mwandb[0m: 	units: 128


----------------------------------------------------------------------------------------------------
EPOCH 1

Training ...

Batch 1 Loss 3.9949
Batch 100 Loss 1.2102
Batch 200 Loss 1.1023
Batch 300 Loss 0.9215

Validating ...

Train Loss: 1.1987 Train Accuracy: 65.6425 Validation Loss: 2.2648 Validation Accuracy: 52.1021

Time taken for the epoch 107.5213
----------------------------------------------------------------------------------------------------
EPOCH 2

Training ...

Batch 1 Loss 0.8842
Batch 100 Loss 0.9651
Batch 200 Loss 0.8830
Batch 300 Loss 0.8691

Validating ...

Train Loss: 0.9246 Train Accuracy: 72.9252 Validation Loss: 2.7304 Validation Accuracy: 48.2015

Time taken for the epoch 26.3330
----------------------------------------------------------------------------------------------------
EPOCH 3

Training ...

Batch 1 Loss 0.9087
Batch 100 Loss 0.8880
Batch 200 Loss 0.9012
Batch 300 Loss 0.8167

Validating ...

Train Loss: 0.8731 Train Accuracy: 74.5821 Validation Loss

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train acc,▁▃▃▃▄▄▅▅▆▆▇▇▇▇██████
train loss,█▆▆▅▅▄▄▃▃▂▂▂▂▂▁▁▁▁▁▁
training time,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val acc,▂▁▁▁▂▃▄▅▆▆▆▇▇▇▇█████
val loss,▄▇▇██▆▅▄▃▃▃▂▂▂▂▁▁▁▁▁

0,1
epoch,20.0
train acc,94.35474
train loss,0.17396
training time,26.29988
val acc,78.25209
val loss,1.65777


[34m[1mwandb[0m: Agent Starting Run: rl1qymhw with config:
[34m[1mwandb[0m: 	enc_dec_layers: 3
[34m[1mwandb[0m: 	layer_type: gru
[34m[1mwandb[0m: 	units: 256


----------------------------------------------------------------------------------------------------
EPOCH 1

Training ...

Batch 1 Loss 3.9865
Batch 100 Loss 1.1091
Batch 200 Loss 0.9682
Batch 300 Loss 0.9615

Validating ...

Train Loss: 1.1330 Train Accuracy: 66.1058 Validation Loss: 2.8572 Validation Accuracy: 48.0170

Time taken for the epoch 107.4903
----------------------------------------------------------------------------------------------------
EPOCH 2

Training ...

Batch 1 Loss 0.9502
Batch 100 Loss 0.8744
Batch 200 Loss 0.8918
Batch 300 Loss 0.8563

Validating ...

Train Loss: 0.8874 Train Accuracy: 73.7681 Validation Loss: 3.1902 Validation Accuracy: 45.7880

Time taken for the epoch 27.1190
----------------------------------------------------------------------------------------------------
EPOCH 3

Training ...

Batch 1 Loss 0.8234
Batch 100 Loss 0.8276
Batch 200 Loss 0.7868
Batch 300 Loss 0.7636

Validating ...

Train Loss: 0.7676 Train Accuracy: 76.5758 Validation Loss

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train acc,▁▃▃▄▄▅▆▆▇▇▇▇████████
train loss,█▆▆▅▄▄▃▂▂▂▂▂▁▁▁▁▁▁▁▁
training time,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val acc,▁▁▂▂▃▄▅▅▆▆▇▇█▇██████
val loss,▆█▇█▇▅▅▄▃▃▂▁▁▂▁▁▂▂▂▂

0,1
epoch,20.0
train acc,98.02383
train loss,0.06257
training time,27.03717
val acc,78.83477
val loss,2.00159


[34m[1mwandb[0m: Agent Starting Run: cdnyr97k with config:
[34m[1mwandb[0m: 	enc_dec_layers: 3
[34m[1mwandb[0m: 	layer_type: lstm
[34m[1mwandb[0m: 	units: 64


----------------------------------------------------------------------------------------------------
EPOCH 1

Training ...

Batch 1 Loss 3.9953
Batch 100 Loss 1.2337
Batch 200 Loss 1.1391
Batch 300 Loss 1.0715

Validating ...

Train Loss: 1.2931 Train Accuracy: 64.2808 Validation Loss: 1.6400 Validation Accuracy: 56.6051

Time taken for the epoch 112.6811
----------------------------------------------------------------------------------------------------
EPOCH 2

Training ...

Batch 1 Loss 1.0549
Batch 100 Loss 0.9028
Batch 200 Loss 0.9330
Batch 300 Loss 0.9410

Validating ...

Train Loss: 0.9445 Train Accuracy: 72.9693 Validation Loss: 2.2366 Validation Accuracy: 49.8124

Time taken for the epoch 27.2880
----------------------------------------------------------------------------------------------------
EPOCH 3

Training ...

Batch 1 Loss 0.9219
Batch 100 Loss 0.9094
Batch 200 Loss 0.8796
Batch 300 Loss 0.8452

Validating ...

Train Loss: 0.8899 Train Accuracy: 74.0852 Validation Loss

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train acc,▁▃▄▄▅▅▅▆▆▆▆▇▇▇▇▇████
train loss,█▆▅▅▄▄▃▃▃▃▂▂▂▂▂▁▁▁▁▁
training time,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val acc,▄▁▂▂▂▃▄▄▅▅▅▆▆▆▇▇████
val loss,▁▇▇▇█▆▆▆▅▇▆▆▆▆▅▆▄▄▅▄

0,1
epoch,20.0
train acc,90.17864
train loss,0.30403
training time,27.03615
val acc,68.16498
val loss,1.99747


[34m[1mwandb[0m: Agent Starting Run: 0lkdt324 with config:
[34m[1mwandb[0m: 	enc_dec_layers: 3
[34m[1mwandb[0m: 	layer_type: lstm
[34m[1mwandb[0m: 	units: 128


----------------------------------------------------------------------------------------------------
EPOCH 1

Training ...

Batch 1 Loss 3.9896
Batch 100 Loss 1.1585
Batch 200 Loss 1.1207
Batch 300 Loss 1.0218

Validating ...

Train Loss: 1.2122 Train Accuracy: 65.7029 Validation Loss: 2.4206 Validation Accuracy: 44.8425

Time taken for the epoch 116.2639
----------------------------------------------------------------------------------------------------
EPOCH 2

Training ...

Batch 1 Loss 0.9571
Batch 100 Loss 0.9368
Batch 200 Loss 0.8848
Batch 300 Loss 0.8460

Validating ...

Train Loss: 0.9108 Train Accuracy: 73.2250 Validation Loss: 2.7464 Validation Accuracy: 46.6261

Time taken for the epoch 28.9244
----------------------------------------------------------------------------------------------------
EPOCH 3

Training ...

Batch 1 Loss 0.8356
Batch 100 Loss 0.8821
Batch 200 Loss 0.8284
Batch 300 Loss 0.7864

Validating ...

Train Loss: 0.8300 Train Accuracy: 75.3022 Validation Loss

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train acc,▁▃▃▄▄▅▅▆▆▆▇▇▇▇██████
train loss,█▆▅▅▄▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁
training time,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val acc,▁▁▁▃▃▃▄▄▅▅▆▆▆▇▇▇▇▇██
val loss,▄▆█▅▆▆▅▅▄▄▃▃▃▂▂▂▂▂▁▁

0,1
epoch,20.0
train acc,95.09074
train loss,0.15074
training time,28.93486
val acc,77.23409
val loss,1.66196


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: v4epzckc with config:
[34m[1mwandb[0m: 	enc_dec_layers: 3
[34m[1mwandb[0m: 	layer_type: lstm
[34m[1mwandb[0m: 	units: 256


----------------------------------------------------------------------------------------------------
EPOCH 1

Training ...

Batch 1 Loss 3.9905
Batch 100 Loss 1.1456
Batch 200 Loss 1.0155
Batch 300 Loss 0.9185

Validating ...

Train Loss: 1.1417 Train Accuracy: 66.2528 Validation Loss: 2.6774 Validation Accuracy: 47.4621

Time taken for the epoch 118.1029
----------------------------------------------------------------------------------------------------
EPOCH 2

Training ...

Batch 1 Loss 0.9485
Batch 100 Loss 0.9184
Batch 200 Loss 0.8157
Batch 300 Loss 0.7488

Validating ...

Train Loss: 0.8504 Train Accuracy: 74.2592 Validation Loss: 3.4207 Validation Accuracy: 42.8704

Time taken for the epoch 29.4493
----------------------------------------------------------------------------------------------------
EPOCH 3

Training ...

Batch 1 Loss 0.8342
Batch 100 Loss 0.8083
Batch 200 Loss 0.6953
Batch 300 Loss 0.6319

Validating ...

Train Loss: 0.7101 Train Accuracy: 77.1758 Validation Loss

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train acc,▁▃▃▄▅▆▆▇▇▇▇▇████████
train loss,█▆▅▄▄▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁
training time,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val acc,▂▁▃▃▄▅▆▆▆▇▇█████████
val loss,▅█▅▅▄▄▃▃▂▁▁▁▁▁▁▂▁▂▂▂

0,1
epoch,20.0
train acc,98.34455
train loss,0.05104
training time,29.56824
val acc,80.36323
val loss,1.77141


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Sweep Agent: Exiting.


#Second sweep

In [18]:
sweep_config2 = {
  "name": "Sweep 2- Assignment3",
  "method": "grid",
  "parameters": {
        "enc_dec_layers": {
           "values": [2, 3]
        },
        "embedding_dim": {
            "values": [64, 128, 256]
        },
        "dropout": {
            "values": [0.2, 0.3]
        }
    }
}

In [19]:
sweep_id2 = wandb.sweep(sweep_config2, project="cs6910-assignment3")

Create sweep with ID: p25mgmof
Sweep URL: https://wandb.ai/harshagudivada/cs6910-assignment3/sweeps/p25mgmof


In [20]:
wandb.agent(sweep_id2, function=lambda: train_with_wandb("hi"))

[34m[1mwandb[0m: Agent Starting Run: 7hnygblg with config:
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 64
[34m[1mwandb[0m: 	enc_dec_layers: 2
[34m[1mwandb[0m: Currently logged in as: [33mharshagudivada[0m. Use [1m`wandb login --relogin`[0m to force relogin


----------------------------------------------------------------------------------------------------
EPOCH 1

Training ...

Batch 1 Loss 3.9933
Batch 100 Loss 1.1702
Batch 200 Loss 1.1788
Batch 300 Loss 0.9235

Validating ...

Train Loss: 1.1972 Train Accuracy: 65.9775 Validation Loss: 2.3918 Validation Accuracy: 48.2350

Time taken for the epoch 200.6497
----------------------------------------------------------------------------------------------------
EPOCH 2

Training ...

Batch 1 Loss 0.9465
Batch 100 Loss 0.9016
Batch 200 Loss 0.8876
Batch 300 Loss 0.8749

Validating ...

Train Loss: 0.9144 Train Accuracy: 73.3141 Validation Loss: 2.2572 Validation Accuracy: 51.4902

Time taken for the epoch 115.5331
----------------------------------------------------------------------------------------------------
EPOCH 3

Training ...

Batch 1 Loss 0.8992
Batch 100 Loss 0.8472
Batch 200 Loss 0.8525
Batch 300 Loss 0.8062

Validating ...

Train Loss: 0.8476 Train Accuracy: 74.7927 Validation Los

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train acc,▁▃▃▃▄▄▅▅▆▆▇▇▇▇▇█████
train loss,█▆▆▅▅▄▄▃▃▃▂▂▂▂▁▁▁▁▁▁
training time,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val acc,▁▂▁▂▂▂▃▃▄▄▄▅▆▆▇▇▇███
val loss,▆▅█▇▇▇▇▇▅▆▅▅▃▄▃▂▂▁▂▁

0,1
epoch,20.0
train acc,94.62084
train loss,0.16484
training time,114.22137
val acc,76.64292
val loss,1.64102


[34m[1mwandb[0m: Agent Starting Run: zvirp5si with config:
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 64
[34m[1mwandb[0m: 	enc_dec_layers: 3


----------------------------------------------------------------------------------------------------
EPOCH 1

Training ...

Batch 1 Loss 3.9898
Batch 100 Loss 1.2218
Batch 200 Loss 1.1181
Batch 300 Loss 1.0975

Validating ...

Train Loss: 1.2713 Train Accuracy: 64.4884 Validation Loss: 2.5254 Validation Accuracy: 42.7320

Time taken for the epoch 298.4471
----------------------------------------------------------------------------------------------------
EPOCH 2

Training ...

Batch 1 Loss 1.0666
Batch 100 Loss 1.0023
Batch 200 Loss 0.9493
Batch 300 Loss 0.8662

Validating ...

Train Loss: 0.9667 Train Accuracy: 72.7097 Validation Loss: 3.1002 Validation Accuracy: 41.8566

Time taken for the epoch 177.4797
----------------------------------------------------------------------------------------------------
EPOCH 3

Training ...

Batch 1 Loss 0.9235
Batch 100 Loss 0.8931
Batch 200 Loss 0.9026
Batch 300 Loss 0.8374

Validating ...

Train Loss: 0.8936 Train Accuracy: 73.8756 Validation Los

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train acc,▁▃▃▃▄▄▅▅▆▆▇▇▇▇▇█████
train loss,█▆▆▅▅▄▄▃▃▃▂▂▂▂▁▁▁▁▁▁
training time,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val acc,▁▁▃▃▃▃▄▄▅▅▆▇▆▇▇▇████
val loss,▅█▅▅▇▅▅▅▄▅▄▃▃▂▂▂▁▁▁▁

0,1
epoch,20.0
train acc,94.39365
train loss,0.17007
training time,178.32952
val acc,76.68987
val loss,1.69963


[34m[1mwandb[0m: Agent Starting Run: pwj0hzif with config:
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 128
[34m[1mwandb[0m: 	enc_dec_layers: 2


----------------------------------------------------------------------------------------------------
EPOCH 1

Training ...

Batch 1 Loss 3.9979
Batch 100 Loss 1.1365
Batch 200 Loss 1.0541
Batch 300 Loss 1.0167

Validating ...

Train Loss: 1.1860 Train Accuracy: 66.6643 Validation Loss: 2.1628 Validation Accuracy: 53.8665

Time taken for the epoch 214.1721
----------------------------------------------------------------------------------------------------
EPOCH 2

Training ...

Batch 1 Loss 0.9656
Batch 100 Loss 0.9245
Batch 200 Loss 0.8662
Batch 300 Loss 0.8135

Validating ...

Train Loss: 0.9083 Train Accuracy: 72.9589 Validation Loss: 2.2344 Validation Accuracy: 52.0920

Time taken for the epoch 127.2420
----------------------------------------------------------------------------------------------------
EPOCH 3

Training ...

Batch 1 Loss 0.8084
Batch 100 Loss 0.7925
Batch 200 Loss 0.7456
Batch 300 Loss 0.6564

Validating ...

Train Loss: 0.7443 Train Accuracy: 77.0290 Validation Los

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train acc,▁▃▃▄▅▅▆▆▇▇▇▇▇███████
train loss,█▆▅▄▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁
training time,█▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val acc,▁▁▁▂▃▅▅▆▆▇▇▇▇███████
val loss,▆▇█▆▅▄▃▃▂▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
train acc,95.97314
train loss,0.12479
training time,127.5805
val acc,80.88235
val loss,1.46934


[34m[1mwandb[0m: Agent Starting Run: vbmqlct4 with config:
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 128
[34m[1mwandb[0m: 	enc_dec_layers: 3


----------------------------------------------------------------------------------------------------
EPOCH 1

Training ...

Batch 1 Loss 3.9939
Batch 100 Loss 1.1613
Batch 200 Loss 1.0818
Batch 300 Loss 0.9579

Validating ...

Train Loss: 1.1887 Train Accuracy: 66.0006 Validation Loss: 2.4379 Validation Accuracy: 47.0890

Time taken for the epoch 314.4798
----------------------------------------------------------------------------------------------------
EPOCH 2

Training ...

Batch 1 Loss 0.9280
Batch 100 Loss 0.9340
Batch 200 Loss 0.8978
Batch 300 Loss 0.8606

Validating ...

Train Loss: 0.9070 Train Accuracy: 73.5235 Validation Loss: 2.4667 Validation Accuracy: 51.0682

Time taken for the epoch 189.8879
----------------------------------------------------------------------------------------------------
EPOCH 3

Training ...

Batch 1 Loss 0.8829
Batch 100 Loss 0.8403
Batch 200 Loss 0.8077
Batch 300 Loss 0.7856

Validating ...

Train Loss: 0.8308 Train Accuracy: 75.2538 Validation Los

[34m[1mwandb[0m: [32m[41mERROR[0m Error while calling W&B API: context deadline exceeded (<Response [500]>)


Batch 300 Loss 0.1479


[34m[1mwandb[0m: [32m[41mERROR[0m Error while calling W&B API: context deadline exceeded (<Response [500]>)



Validating ...

Train Loss: 0.1485 Train Accuracy: 95.0667 Validation Loss: 1.5550 Validation Accuracy: 79.1095

Time taken for the epoch 192.3135
----------------------------------------------------------------------------------------------------
EPOCH 18

Training ...

Batch 1 Loss 0.1296


[34m[1mwandb[0m: [32m[41mERROR[0m Error while calling W&B API: context deadline exceeded (<Response [500]>)
[34m[1mwandb[0m: Network error (HTTPError), entering retry loop.


Batch 100 Loss 0.1221
Batch 200 Loss 0.1317
Batch 300 Loss 0.1109

Validating ...

Train Loss: 0.1389 Train Accuracy: 95.4233 Validation Loss: 1.7207 Validation Accuracy: 77.2140

Time taken for the epoch 191.9324
----------------------------------------------------------------------------------------------------
EPOCH 19

Training ...

Batch 1 Loss 0.1234
Batch 100 Loss 0.1266
Batch 200 Loss 0.1325
Batch 300 Loss 0.1444

Validating ...

Train Loss: 0.1296 Train Accuracy: 95.7888 Validation Loss: 1.5487 Validation Accuracy: 79.4208

Time taken for the epoch 192.3264
----------------------------------------------------------------------------------------------------
EPOCH 20

Training ...

Batch 1 Loss 0.1256
Batch 100 Loss 0.1284
Batch 200 Loss 0.1139
Batch 300 Loss 0.1295

Validating ...

Train Loss: 0.1210 Train Accuracy: 96.0812 Validation Loss: 1.6165 Validation Accuracy: 78.6657

Time taken for the epoch 193.7575
--------------------------------------------------------------------

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train acc,▁▃▃▄▄▅▅▆▆▇▇▇▇▇██████
train loss,█▆▆▅▄▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁
training time,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val acc,▁▂▂▂▂▃▄▄▅▅▅▆▆▇▇▇████
val loss,▆▆▆▇█▆▆▅▅▅▄▃▃▂▂▂▁▂▁▁

0,1
epoch,20.0
train acc,96.08123
train loss,0.121
training time,193.75747
val acc,78.66566
val loss,1.61649


[34m[1mwandb[0m: Agent Starting Run: 0e6jf7j5 with config:
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 256
[34m[1mwandb[0m: 	enc_dec_layers: 2


----------------------------------------------------------------------------------------------------
EPOCH 1

Training ...

Batch 1 Loss 4.0011
Batch 100 Loss 1.1307
Batch 200 Loss 1.0788
Batch 300 Loss 0.9874

Validating ...

Train Loss: 1.1693 Train Accuracy: 66.7686 Validation Loss: 3.4296 Validation Accuracy: 31.1851

Time taken for the epoch 234.4835
----------------------------------------------------------------------------------------------------
EPOCH 2

Training ...

Batch 1 Loss 1.0132
Batch 100 Loss 0.9082
Batch 200 Loss 0.8860
Batch 300 Loss 0.8641

Validating ...

Train Loss: 0.9044 Train Accuracy: 73.2194 Validation Loss: 2.2672 Validation Accuracy: 52.1571

Time taken for the epoch 146.3885
----------------------------------------------------------------------------------------------------
EPOCH 3

Training ...

Batch 1 Loss 0.8348
Batch 100 Loss 0.7179
Batch 200 Loss 0.7016
Batch 300 Loss 0.6676

Validating ...

Train Loss: 0.7095 Train Accuracy: 77.2322 Validation Los

[34m[1mwandb[0m: [32m[41mERROR[0m Error while calling W&B API: context deadline exceeded (<Response [500]>)


Batch 300 Loss 0.1773

Validating ...

Train Loss: 0.1815 Train Accuracy: 94.0186 Validation Loss: 1.3289 Validation Accuracy: 79.9524

Time taken for the epoch 151.8802
----------------------------------------------------------------------------------------------------
EPOCH 12

Training ...

Batch 1 Loss 0.1720
Batch 100 Loss 0.1757
Batch 200 Loss 0.1655
Batch 300 Loss 0.1488

Validating ...

Train Loss: 0.1676 Train Accuracy: 94.4739 Validation Loss: 1.3638 Validation Accuracy: 79.4093

Time taken for the epoch 205.6169
----------------------------------------------------------------------------------------------------
EPOCH 13

Training ...

Batch 1 Loss 0.1727
Batch 100 Loss 0.1509
Batch 200 Loss 0.1446
Batch 300 Loss 0.1613

Validating ...

Train Loss: 0.1570 Train Accuracy: 94.8261 Validation Loss: 1.3606 Validation Accuracy: 80.2894

Time taken for the epoch 151.5325
----------------------------------------------------------------------------------------------------
EPOCH 14

T

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train acc,▁▃▃▅▆▆▇▇▇▇▇█████████
train loss,█▆▅▄▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁
training time,█▁▁▂▁▁▁▁▁▁▁▆▁▂▁▁▁▁▁▃
val acc,▁▄▅▆▆▇▇▇▇███████████
val loss,█▄▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
train acc,96.56069
train loss,0.10484
training time,167.25477
val acc,80.97971
val loss,1.46316


[34m[1mwandb[0m: Agent Starting Run: lqwkbm7y with config:
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 256
[34m[1mwandb[0m: 	enc_dec_layers: 3


----------------------------------------------------------------------------------------------------
EPOCH 1

Training ...

Batch 1 Loss 3.9888
Batch 100 Loss 1.1180
Batch 200 Loss 1.0516
Batch 300 Loss 0.9426

Validating ...

Train Loss: 1.1707 Train Accuracy: 66.2749 Validation Loss: 2.5930 Validation Accuracy: 44.9969

Time taken for the epoch 351.5989
----------------------------------------------------------------------------------------------------
EPOCH 2

Training ...

Batch 1 Loss 0.9535
Batch 100 Loss 0.9548
Batch 200 Loss 0.8640
Batch 300 Loss 0.8122

Validating ...

Train Loss: 0.8717 Train Accuracy: 73.9075 Validation Loss: 2.8827 Validation Accuracy: 45.2310

Time taken for the epoch 215.8157
----------------------------------------------------------------------------------------------------
EPOCH 3

Training ...

Batch 1 Loss 0.7812
Batch 100 Loss 0.7570
Batch 200 Loss 0.6984
Batch 300 Loss 0.6568

Validating ...

Train Loss: 0.7188 Train Accuracy: 77.6134 Validation Los

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train acc,▁▃▄▄▅▆▆▇▇▇▇▇████████
train loss,█▆▅▄▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁
training time,█▁▁▁▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁
val acc,▁▁▂▃▄▄▅▆▆▇█▇█▇██████
val loss,▇█▆▇▆▅▄▃▃▂▁▁▁▁▁▁▁▂▁▁

0,1
epoch,20.0
train acc,97.03795
train loss,0.09214
training time,212.73947
val acc,80.96092
val loss,1.58126


[34m[1mwandb[0m: Agent Starting Run: i8sg8zek with config:
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 64
[34m[1mwandb[0m: 	enc_dec_layers: 2


----------------------------------------------------------------------------------------------------
EPOCH 1

Training ...

Batch 1 Loss 3.9945
Batch 100 Loss 1.2089
Batch 200 Loss 1.0506
Batch 300 Loss 1.0637

Validating ...

Train Loss: 1.2187 Train Accuracy: 66.1559 Validation Loss: 2.3474 Validation Accuracy: 57.2878

Time taken for the epoch 206.1995
----------------------------------------------------------------------------------------------------
EPOCH 2

Training ...

Batch 1 Loss 0.9729
Batch 100 Loss 0.9885
Batch 200 Loss 0.9168
Batch 300 Loss 0.8774

Validating ...

Train Loss: 0.9285 Train Accuracy: 72.4329 Validation Loss: 2.2939 Validation Accuracy: 52.0727

Time taken for the epoch 119.4992
----------------------------------------------------------------------------------------------------
EPOCH 3

Training ...

Batch 1 Loss 0.8739
Batch 100 Loss 0.8082
Batch 200 Loss 0.8198
Batch 300 Loss 0.7634

Validating ...

Train Loss: 0.8088 Train Accuracy: 75.6300 Validation Los

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train acc,▁▂▃▄▅▅▆▆▆▇▇▇▇▇██████
train loss,█▆▅▅▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁
training time,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val acc,▂▁▁▂▂▄▄▅▅▆▇▇▇▇▇█████
val loss,█▇▇██▆▅▄▅▃▂▂▂▂▁▁▁▁▁▁

0,1
epoch,20.0
train acc,95.55773
train loss,0.13722
training time,119.01033
val acc,80.2275
val loss,1.46918


[34m[1mwandb[0m: Agent Starting Run: 79tjvetz with config:
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 64
[34m[1mwandb[0m: 	enc_dec_layers: 3


----------------------------------------------------------------------------------------------------
EPOCH 1

Training ...

Batch 1 Loss 3.9892
Batch 100 Loss 1.1997
Batch 200 Loss 1.0520
Batch 300 Loss 1.0204

Validating ...

Train Loss: 1.2087 Train Accuracy: 65.8216 Validation Loss: 3.0124 Validation Accuracy: 38.2768

Time taken for the epoch 305.0879
----------------------------------------------------------------------------------------------------
EPOCH 2

Training ...

Batch 1 Loss 0.9836
Batch 100 Loss 0.9459
Batch 200 Loss 0.8918
Batch 300 Loss 0.8631

Validating ...

Train Loss: 0.9097 Train Accuracy: 73.3048 Validation Loss: 2.8802 Validation Accuracy: 44.8117

Time taken for the epoch 181.2504
----------------------------------------------------------------------------------------------------
EPOCH 3

Training ...

Batch 1 Loss 0.8906
Batch 100 Loss 0.8441
Batch 200 Loss 0.7812
Batch 300 Loss 0.7941

Validating ...

Train Loss: 0.8260 Train Accuracy: 75.1906 Validation Los

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train acc,▁▃▃▄▄▅▅▆▆▇▇▇▇▇██████
train loss,█▆▅▅▄▄▃▃▃▂▂▂▂▁▁▁▁▁▁▁
training time,█▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁
val acc,▁▂▃▃▃▄▄▅▅▅▆▆▆▆▇▇▇▇██
val loss,█▇▆▇█▆▆▄▆▅▅▄▄▄▂▃▂▂▂▁

0,1
epoch,20.0
train acc,95.18855
train loss,0.14661
training time,182.89684
val acc,77.23711
val loss,1.6975


[34m[1mwandb[0m: Agent Starting Run: 8nezn39m with config:
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 128
[34m[1mwandb[0m: 	enc_dec_layers: 2


----------------------------------------------------------------------------------------------------
EPOCH 1

Training ...



[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.
