In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import random
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import time

import wandb
wandb.login()

random.seed()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mcs20b013[0m ([33mcs20b013-bersilin[0m). Use [1m`wandb login --relogin`[0m to force relogin


cuda


In [2]:
# Language Model
SOS_token = 0
EOS_token = 1

class Language:
    def __init__(self, name):
        self.name = name
        self.word2index = {}
        self.word2count = {}
        self.index2word = {SOS_token: "<", EOS_token: ">"}
        self.n_chars = 2  # Count SOS and EOS

    def addWord(self, word):
        for char in word:
            self.addChar(char)

    def addChar(self, char):
        if char not in self.word2index:
            self.word2index[char] = self.n_chars
            self.word2count[char] = 1
            self.index2word[self.n_chars] = char
            self.n_chars += 1
        else:
            self.word2count[char] += 1

In [3]:
def get_data(lang: str, type: str) -> list[list[str]]:
    """
    Returns: 'pairs': list of [input_word, target_word] pairs
    """
    path = "../aksharantar_sampled/{}/{}_{}.csv".format(lang, lang, type)
    df = pd.read_csv(path, header=None)
    pairs = df.values.tolist()
    return pairs

In [4]:
def get_languages(lang: str):
    """
    Returns 
    1. input_lang: input language - English
    2. output_lang: output language - Given language
    3. pairs: list of [input_word, target_word] pairs
    """
    input_lang = Language('eng')
    output_lang = Language(lang)
    pairs = get_data(lang, "train")
    for pair in pairs:
        input_lang.addWord(pair[0])
        output_lang.addWord(pair[1])
    return input_lang, output_lang, pairs

In [5]:
def get_cell(cell_type: str):
    if cell_type == "LSTM":
        return nn.LSTM
    elif cell_type == "GRU":
        return nn.GRU
    elif cell_type == "RNN":
        return nn.RNN
    else:
        raise Exception("Invalid cell type")
    
def get_optimizer(optimizer: str):
    if optimizer == "SGD":
        return optim.SGD
    elif optimizer == "ADAM":
        return optim.Adam
    else:
        raise Exception("Invalid optimizer")

In [6]:
class Encoder(nn.Module):
    def __init__(self,
                 in_sz: int,
                 embed_sz: int,
                 hidden_sz: int,
                 cell_type: str,
                 n_layers: int,
                 dropout: float):
        
        super(Encoder, self).__init__()
        self.hidden_sz = hidden_sz
        self.n_layers = n_layers
        self.dropout = dropout
        self.cell_type = cell_type
        self.embedding = nn.Embedding(in_sz, embed_sz)

        self.rnn = get_cell(cell_type)(input_size = embed_sz,
                                       hidden_size = hidden_sz,
                                       num_layers = n_layers,
                                       dropout = dropout)
        
    def forward(self, input, hidden, cell):
        embedded = self.embedding(input).view(1, 1, -1)

        if(self.cell_type == "LSTM"):
            output, (hidden, cell) = self.rnn(embedded, (hidden, cell))
        else:
            output, hidden = self.rnn(embedded, hidden)
            
        return output, hidden, cell
    
    def initHidden(self):
        return torch.zeros(self.n_layers, 1, self.hidden_sz, device=device)

In [7]:
class AttentionDecoder(nn.Module):
    def __init__(self,
                 out_sz: int,
                 embed_sz: int,
                 hidden_sz: int,
                 cell_type: str,
                 n_layers: int,
                 dropout: float):

        super(AttentionDecoder, self).__init__()
        self.hidden_sz = hidden_sz
        self.n_layers = n_layers
        self.dropout = dropout
        self.cell_type = cell_type
        self.embedding = nn.Embedding(out_sz, embed_sz)

        self.attn = nn.Linear(hidden_sz + embed_sz, 50)
        self.attn_combine = nn.Linear(hidden_sz + embed_sz, hidden_sz)

        self.rnn = get_cell(cell_type)(input_size = hidden_sz,
                                       hidden_size = hidden_sz,
                                       num_layers = n_layers,
                                       dropout = dropout)
        
        self.out = nn.Linear(hidden_sz, out_sz)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden, cell, encoder_outputs):
        embedding = self.embedding(input).view(1, 1, -1)

        attn_weights = F.softmax(self.attn(torch.cat((embedding[0], hidden[0]), 1)), dim=1)
        attn_applied = torch.bmm(attn_weights.unsqueeze(0), encoder_outputs.unsqueeze(0))
        
        output = torch.cat((embedding[0], attn_applied[0]), 1)
        output = self.attn_combine(output).unsqueeze(0)

        if(self.cell_type == "LSTM"):
            output, (hidden, cell) = self.rnn(output, (hidden, cell))
        else:
            output, hidden = self.rnn(output, hidden)

        output = self.softmax(self.out(output[0]))
        return output, hidden, cell, attn_weights
    
    def initHidden(self):
        return torch.zeros(self.n_layers, 1, self.hidden_sz, device=device)

In [8]:
def indexesFromWord(lang:Language, word:str):
    return [lang.word2index[char] for char in word]

def tensorFromWord(lang:Language, word:str):
    indexes = indexesFromWord(lang, word)
    indexes.append(EOS_token)
    return torch.tensor(indexes, dtype=torch.long, device=device).view(-1, 1)

def tensorsFromPair(input_lang:Language, output_lang:Language, pair:list[str]):
    input_tensor = tensorFromWord(input_lang, pair[0])
    target_tensor = tensorFromWord(output_lang, pair[1])
    return (input_tensor, target_tensor)

In [9]:
def params_definition(): 
    """
    params:

        embed_size : size of embedding (input and output) (8, 16, 32, 64)
        hidden_size : size of hidden layer (64, 128, 256, 512)
        cell_type : type of cell (LSTM, GRU, RNN)
        num_layers : number of layers in encoder (1, 2, 3)
        dropout : dropout probability
        learning_rate : learning rate
        teacher_forcing_ratio : teacher forcing ratio (0.5 fixed for now)
        optimizer : optimizer (SGD, Adam)
        max_length : maximum length of input word (50 fixed for now)

    """
    pass

In [10]:
PRINT_EVERY = 5000
PLOT_EVERY = 100

In [11]:
class Translator:
    def __init__(self, lang: str, params: dict):
        self.lang = lang
        self.input_lang, self.output_lang, self.pairs = get_languages(self.lang)
        self.input_size = self.input_lang.n_chars
        self.output_size = self.output_lang.n_chars

        self.training_pairs = [tensorsFromPair(self.input_lang, self.output_lang, pair) for pair in self.pairs]

        self.encoder = Encoder(in_sz = self.input_size,
                             embed_sz = params["embed_size"],
                             hidden_sz = params["hidden_size"],
                             cell_type = params["cell_type"],
                             n_layers = params["num_layers"],
                             dropout = params["dropout"]).to(device)
        
        self.decoder = AttentionDecoder(out_sz = self.output_size,
                             embed_sz = params["embed_size"],
                             hidden_sz = params["hidden_size"],
                             cell_type = params["cell_type"],
                             n_layers = params["num_layers"],
                             dropout = params["dropout"]).to(device)

        self.encoder_optimizer = get_optimizer(params["optimizer"])(self.encoder.parameters(), lr=params["learning_rate"], weight_decay=params["weight_decay"])
        self.decoder_optimizer = get_optimizer(params["optimizer"])(self.decoder.parameters(), lr=params["learning_rate"], weight_decay=params["weight_decay"])
        
        self.criterion = nn.NLLLoss()

        self.teacher_forcing_ratio = params["teacher_forcing_ratio"]
        self.max_length = params["max_length"]

    def train_single(self, input_tensor, target_tensor):
        encoder_hidden = self.encoder.initHidden()
        encoder_cell = self.encoder.initHidden()

        self.encoder_optimizer.zero_grad()
        self.decoder_optimizer.zero_grad()

        input_length = input_tensor.size(0)
        target_length = target_tensor.size(0)

        encoder_outputs = torch.zeros(self.max_length, self.encoder.hidden_sz, device=device)

        loss = 0

        for ei in range(input_length):
            encoder_output, encoder_hidden, encoder_cell = self.encoder(input_tensor[ei], encoder_hidden, encoder_cell)
            encoder_outputs[ei] = encoder_output[0, 0]

        decoder_input = torch.tensor([[SOS_token]], device=device)
        decoder_hidden, decoder_cell = encoder_hidden, encoder_cell

        use_teacher_forcing = True if random.random() < self.teacher_forcing_ratio else False

        if use_teacher_forcing:
            for di in range(target_length):
                decoder_output, decoder_hidden, decoder_cell, decoder_attention = self.decoder(decoder_input, decoder_hidden, decoder_cell, encoder_outputs)
                loss += self.criterion(decoder_output, target_tensor[di])

                decoder_input = target_tensor[di]
        else:
            for di in range(target_length):
                decoder_output, decoder_hidden, decoder_cell, decoder_attention = self.decoder(decoder_input, decoder_hidden, decoder_cell, encoder_outputs)
                loss += self.criterion(decoder_output, target_tensor[di])

                topv, topi = decoder_output.topk(1)
                decoder_input = topi.squeeze().detach()
                if decoder_input.item() == EOS_token:
                    break

        loss.backward()
        self.encoder_optimizer.step()
        self.decoder_optimizer.step()

        return loss.item() / target_length
    
    def train(self, iters=-1):
        start_time = time.time()
        plot_losses = []
        print_loss_total = 0
        plot_loss_total = 0

        random.shuffle(self.training_pairs)
        iters = len(self.training_pairs) if iters == -1 else iters

        for iter in range(1, iters):
            training_pair = self.training_pairs[iter - 1]
            input_tensor = training_pair[0]
            target_tensor = training_pair[1]

            loss = self.train_single(input_tensor, target_tensor)
            print_loss_total += loss
            plot_loss_total += loss

            if iter % PRINT_EVERY == 0:
                print_loss_avg = print_loss_total / PRINT_EVERY
                print_loss_total = 0
                current_time = time.time()
                print("Loss: {:.4f} | Iterations: {} | Time: {:.3f}".format(print_loss_avg, iter, current_time - start_time))

            if iter % PLOT_EVERY == 0:
                plot_loss_avg = plot_loss_total / PLOT_EVERY
                plot_losses.append(plot_loss_avg)
                plot_loss_total = 0
            
        return plot_losses
    
    def evaluate(self, word):
        with torch.no_grad():
            input_tensor = tensorFromWord(self.input_lang, word)
            input_length = input_tensor.size()[0]
            encoder_hidden = self.encoder.initHidden()
            encoder_cell = self.encoder.initHidden()

            encoder_outputs = torch.zeros(self.max_length, self.encoder.hidden_sz, device=device)

            for ei in range(input_length):
                encoder_output, encoder_hidden, encoder_cell = self.encoder(input_tensor[ei], encoder_hidden, encoder_cell)
                encoder_outputs[ei] += encoder_output[0, 0]

            decoder_input = torch.tensor([[SOS_token]], device=device)
            decoder_hidden, decoder_cell = encoder_hidden, encoder_cell

            decoded_chars = ""
            decoder_attentions = torch.zeros(self.max_length, self.max_length)

            for di in range(self.max_length):
                decoder_output, decoder_hidden, decoder_cell, decoder_attention = self.decoder(decoder_input, decoder_hidden, decoder_cell, encoder_outputs)
                decoder_attentions[di] = decoder_attention.data
                topv, topi = decoder_output.topk(1)
                
                if topi.item() == EOS_token:
                    break
                else:
                    decoded_chars += self.output_lang.index2word[topi.item()]

                decoder_input = topi.squeeze().detach()

            return decoded_chars, decoder_attentions[:di + 1]
        
    def test_validate(self, type:str):
        pairs = get_data(self.lang, type)
        accuracy = 0
        for pair in pairs:
            output, _ = self.evaluate(pair[0])
            if output == pair[1]:
                accuracy += 1
        return accuracy / len(pairs)

In [12]:
def plot_losses(plot_losses, title: str):
    # return plot of losses
    x_labels = [i * PLOT_EVERY for i in range(1, len(plot_losses) + 1)]
    plt.plot(x_labels, plot_losses, color="blue")
    plt.xlabel("Iterations")
    plt.ylabel("Loss")
    plt.title(title)
    plt.show()

In [13]:
sweep_configuration = {
    "method": "bayes",
    "metric": {
        "name": "validation_accuracy",
        "goal": "maximize"
    },
    "parameters": {
        "embed_size": {
            "values": [8, 16, 32]
        },
        "hidden_size": {
            "values": [64, 128, 256, 512]
        },
        "cell_type": {
            "values": ["RNN", "LSTM", "GRU"]
        },
        "num_layers": {
            "values": [1, 2, 3]
        },
        "dropout": {
            "values": [0, 0.1, 0.2]
        },
        "learning_rate": {
            "values": [0.0005, 0.001, 0.005]
        },
        "optimizer": {
            "values": ["SGD", "ADAM"]
        },
        "teacher_forcing_ratio": {
            'value': 0.5
        },
        "max_length": {
            'value': 50
        },
        "weight_decay": {
            "values": [0, 1e-1, 1e-3, 1e-5]
        }
    }
}

In [14]:
count = 0

def train_sweep():
    global count
    count += 1

    run = wandb.init()
    config = wandb.config
    run.name = "embed_size: {} | hidden_size: {} | cell_type: {} | num_layers: {} | dropout: {} | learning_rate: {} | optimizer: {} | teacher_forcing_ratio: {} | max_length: {} | weight_decay: {}".format(config.embed_size, config.hidden_size, config.cell_type, config.num_layers, config.dropout, config.learning_rate, config.optimizer, config.teacher_forcing_ratio, config.max_length, config.weight_decay)

    model = Translator('tam', config)

    epochs = 10
    old_validation_accuracy = 0

    for epoch in range(epochs):
        print("Epoch: {}".format(epoch + 1))
        plot_losses = model.train()

        # take average of plot losses as training loss
        training_loss = sum(plot_losses) / len(plot_losses)
        
        training_accuracy = model.test_validate('train')
        print("Training Accuracy: {:.4f}".format(training_accuracy))

        validation_accuracy = model.test_validate('valid')
        print("Validation Accuracy: {:.4f}".format(validation_accuracy))

        wandb.log({
            "epoch": epoch + 1,
            "training_loss": training_loss,
            "training_accuracy": training_accuracy,
            "validation_accuracy": validation_accuracy
        })

        if epoch > 0:
            if validation_accuracy < 0.0001:
                break

            if validation_accuracy < 0.9 * old_validation_accuracy:
                break

        old_validation_accuracy = validation_accuracy

    test_accuracy = model.test_validate('test')
    print("Test Accuracy: {:.4f}".format(test_accuracy))

    wandb.log({
        "test_accuracy": test_accuracy
    })

    run.finish()

In [15]:
wandb_id = wandb.sweep(sweep_configuration, project="CS6910_Assn3_Attention_RNN")
wandb.agent(wandb_id, train_sweep, count=10)

Create sweep with ID: w7rnszm2
Sweep URL: https://wandb.ai/cs20b013-bersilin/CS6910_Assn3_Attention_RNN/sweeps/w7rnszm2


[34m[1mwandb[0m: Agent Starting Run: h8x541yx with config:
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	embed_size: 32
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0005
[34m[1mwandb[0m: 	max_length: 50
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	optimizer: ADAM
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.5
[34m[1mwandb[0m: 	weight_decay: 0
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.




Epoch: 1
Loss: 1.5957 | Iterations: 5000 | Time: 48.352
Loss: 0.6664 | Iterations: 10000 | Time: 96.237
Loss: 0.4980 | Iterations: 15000 | Time: 144.529
Loss: 0.4059 | Iterations: 20000 | Time: 192.912
Loss: 0.3717 | Iterations: 25000 | Time: 241.216
Loss: 0.3835 | Iterations: 30000 | Time: 289.729
Loss: 0.3403 | Iterations: 35000 | Time: 338.893
Loss: 0.3370 | Iterations: 40000 | Time: 387.429
Loss: 0.3160 | Iterations: 45000 | Time: 436.056
Loss: 0.3073 | Iterations: 50000 | Time: 484.980
Training Accuracy: 0.6063
Validation Accuracy: 0.4856
Epoch: 2
Loss: 0.3097 | Iterations: 5000 | Time: 49.634
Loss: 0.2883 | Iterations: 10000 | Time: 100.192
Loss: 0.3097 | Iterations: 15000 | Time: 149.005
Loss: 0.2875 | Iterations: 20000 | Time: 197.761
Loss: 0.2994 | Iterations: 25000 | Time: 246.339
Loss: 0.2926 | Iterations: 30000 | Time: 294.910
Loss: 0.2877 | Iterations: 35000 | Time: 345.130
Loss: 0.2869 | Iterations: 40000 | Time: 394.204
Loss: 0.2957 | Iterations: 45000 | Time: 444.152
Lo

VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_accuracy,▇▇█▇█▇▆▅▄▁
training_loss,█▂▁▁▁▁▂▃▄█
validation_accuracy,▇█████▆▅▅▁

0,1
epoch,10.0
test_accuracy,0.28979
training_accuracy,0.39008
training_loss,0.53447
validation_accuracy,0.34766


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: yy0zsb5a with config:
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embed_size: 8
[34m[1mwandb[0m: 	hidden_size: 512
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	max_length: 50
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: SGD
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.5
[34m[1mwandb[0m: 	weight_decay: 0.001
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016670416399999038, max=1.0…

Epoch: 1
Loss: 2.9706 | Iterations: 5000 | Time: 129.811
Loss: 2.7834 | Iterations: 10000 | Time: 260.419
Loss: 2.7395 | Iterations: 15000 | Time: 390.945
Loss: 2.7098 | Iterations: 20000 | Time: 523.051
Loss: 2.6703 | Iterations: 25000 | Time: 654.953
Loss: 2.6006 | Iterations: 30000 | Time: 786.196
Loss: 2.5411 | Iterations: 35000 | Time: 918.532
Loss: 2.5141 | Iterations: 40000 | Time: 1050.254
Loss: 2.4765 | Iterations: 45000 | Time: 1182.790
Loss: 2.4570 | Iterations: 50000 | Time: 1314.785
Training Accuracy: 0.0000
Validation Accuracy: 0.0000
Epoch: 2
Loss: 2.4180 | Iterations: 5000 | Time: 130.975
Loss: 2.3768 | Iterations: 10000 | Time: 263.530
Loss: 2.3293 | Iterations: 15000 | Time: 395.597
Loss: 2.2789 | Iterations: 20000 | Time: 528.774
Loss: 2.2221 | Iterations: 25000 | Time: 660.648
Loss: 2.1842 | Iterations: 30000 | Time: 793.496
Loss: 2.1369 | Iterations: 35000 | Time: 925.962
Loss: 2.0839 | Iterations: 40000 | Time: 1059.618
Loss: 2.0235 | Iterations: 45000 | Time: 119

VBox(children=(Label(value='0.003 MB of 0.032 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.100636…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_accuracy,▁▁▁▄▆▇▇█▇█
training_loss,█▇▅▃▂▁▁▁▁▁
validation_accuracy,▁▁▂▅▇▇▇███

0,1
epoch,10.0
test_accuracy,0.44922
training_accuracy,0.71713
training_loss,0.21812
validation_accuracy,0.55566


[34m[1mwandb[0m: Agent Starting Run: 80lxbl3c with config:
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	embed_size: 32
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0005
[34m[1mwandb[0m: 	max_length: 50
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	optimizer: SGD
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.5
[34m[1mwandb[0m: 	weight_decay: 0.001
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Epoch: 1
Loss: 3.1110 | Iterations: 5000 | Time: 53.285
Loss: 2.8292 | Iterations: 10000 | Time: 104.164
Loss: 2.7503 | Iterations: 15000 | Time: 155.200
Loss: 2.6424 | Iterations: 20000 | Time: 206.733
Loss: 2.5891 | Iterations: 25000 | Time: 258.548
Loss: 2.5527 | Iterations: 30000 | Time: 310.265
Loss: 2.5384 | Iterations: 35000 | Time: 362.280
Loss: 2.5147 | Iterations: 40000 | Time: 414.661
Loss: 2.4860 | Iterations: 45000 | Time: 466.971
Loss: 2.4496 | Iterations: 50000 | Time: 519.022
Training Accuracy: 0.0000
Validation Accuracy: 0.0000
Epoch: 2
Loss: 2.4108 | Iterations: 5000 | Time: 52.481
Loss: 2.3627 | Iterations: 10000 | Time: 104.827
Loss: 2.3005 | Iterations: 15000 | Time: 156.873
Loss: 2.2243 | Iterations: 20000 | Time: 209.381
Loss: 2.1376 | Iterations: 25000 | Time: 261.383
Loss: 2.0345 | Iterations: 30000 | Time: 313.712
Loss: 1.9329 | Iterations: 35000 | Time: 366.191
Loss: 1.8177 | Iterations: 40000 | Time: 418.347
Loss: 1.7405 | Iterations: 45000 | Time: 470.448
L

VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_accuracy,▁▁▄▆▇▇▇███
training_loss,█▆▃▂▁▁▁▁▁▁
validation_accuracy,▁▁▅▇▇▇████

0,1
epoch,10.0
test_accuracy,0.38892
training_accuracy,0.64787
training_loss,0.25895
validation_accuracy,0.49927


[34m[1mwandb[0m: Agent Starting Run: ka8l4crk with config:
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_size: 8
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0005
[34m[1mwandb[0m: 	max_length: 50
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: SGD
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.5
[34m[1mwandb[0m: 	weight_decay: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Epoch: 1
Loss: 2.9697 | Iterations: 5000 | Time: 56.496
Loss: 2.7558 | Iterations: 10000 | Time: 112.569
Loss: 2.7505 | Iterations: 15000 | Time: 168.640
Loss: 2.7551 | Iterations: 20000 | Time: 224.689
Loss: 2.7539 | Iterations: 25000 | Time: 280.617
Loss: 2.7408 | Iterations: 30000 | Time: 336.688
Loss: 2.7433 | Iterations: 35000 | Time: 392.984
Loss: 2.7401 | Iterations: 40000 | Time: 449.218
Loss: 2.7355 | Iterations: 45000 | Time: 505.582
Loss: 2.7331 | Iterations: 50000 | Time: 562.076
Training Accuracy: 0.0000
Validation Accuracy: 0.0000
Epoch: 2
Loss: 2.7396 | Iterations: 5000 | Time: 56.264
Loss: 2.7369 | Iterations: 10000 | Time: 112.390
Loss: 2.7330 | Iterations: 15000 | Time: 168.612
Loss: 2.7346 | Iterations: 20000 | Time: 224.530
Loss: 2.7381 | Iterations: 25000 | Time: 280.392
Loss: 2.7306 | Iterations: 30000 | Time: 336.915
Loss: 2.7302 | Iterations: 35000 | Time: 393.148
Loss: 2.7259 | Iterations: 40000 | Time: 449.393
Loss: 2.7255 | Iterations: 45000 | Time: 505.652
L

VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁█
test_accuracy,▁
training_accuracy,▁▁
training_loss,█▁
validation_accuracy,▁▁

0,1
epoch,2.0
test_accuracy,0.0
training_accuracy,0.0
training_loss,2.7329
validation_accuracy,0.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 8s7o5ebl with config:
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_size: 16
[34m[1mwandb[0m: 	hidden_size: 512
[34m[1mwandb[0m: 	learning_rate: 0.0005
[34m[1mwandb[0m: 	max_length: 50
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	optimizer: SGD
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.5
[34m[1mwandb[0m: 	weight_decay: 0.001
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Epoch: 1
Loss: 3.2456 | Iterations: 5000 | Time: 97.152
Loss: 2.9706 | Iterations: 10000 | Time: 193.306
Loss: 2.8760 | Iterations: 15000 | Time: 287.368
Loss: 2.8422 | Iterations: 20000 | Time: 380.593
Loss: 2.8018 | Iterations: 25000 | Time: 473.521
Loss: 2.7755 | Iterations: 30000 | Time: 566.565
Loss: 2.7687 | Iterations: 35000 | Time: 660.274
Loss: 2.7519 | Iterations: 40000 | Time: 755.053
Loss: 2.7146 | Iterations: 45000 | Time: 849.305
Loss: 2.7067 | Iterations: 50000 | Time: 944.518
Training Accuracy: 0.0000
Validation Accuracy: 0.0000
Epoch: 2
Loss: 2.6805 | Iterations: 5000 | Time: 95.134
Loss: 2.6626 | Iterations: 10000 | Time: 191.217
Loss: 2.6222 | Iterations: 15000 | Time: 286.141
Loss: 2.5787 | Iterations: 20000 | Time: 381.765
Loss: 2.5546 | Iterations: 25000 | Time: 477.764
Loss: 2.5297 | Iterations: 30000 | Time: 573.997
Loss: 2.5151 | Iterations: 35000 | Time: 670.403
Loss: 2.5044 | Iterations: 40000 | Time: 766.386
Loss: 2.4941 | Iterations: 45000 | Time: 862.082
L

0,1
epoch,▁█
test_accuracy,▁
training_accuracy,▁▁
training_loss,█▁
validation_accuracy,▁▁

0,1
epoch,2.0
test_accuracy,0.0
training_accuracy,0.0
training_loss,2.56028
validation_accuracy,0.0


[34m[1mwandb[0m: Agent Starting Run: aaq0n13f with config:
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_size: 16
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	max_length: 50
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: SGD
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.5
[34m[1mwandb[0m: 	weight_decay: 1e-05
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016671147183418118, max=1.0…

Epoch: 1
Loss: 2.9296 | Iterations: 5000 | Time: 60.141
Loss: 2.7838 | Iterations: 10000 | Time: 120.060
Loss: 2.7294 | Iterations: 15000 | Time: 180.538
Loss: 2.6799 | Iterations: 20000 | Time: 241.404
Loss: 2.6193 | Iterations: 25000 | Time: 302.395
Loss: 2.5639 | Iterations: 30000 | Time: 364.243
Loss: 2.5163 | Iterations: 35000 | Time: 426.340
Loss: 2.4817 | Iterations: 40000 | Time: 488.105
Loss: 2.4501 | Iterations: 45000 | Time: 550.080
Loss: 2.4072 | Iterations: 50000 | Time: 611.584
Training Accuracy: 0.0000
Validation Accuracy: 0.0000
Epoch: 2
Loss: 2.3553 | Iterations: 5000 | Time: 60.612
Loss: 2.3135 | Iterations: 10000 | Time: 121.208
Loss: 2.2435 | Iterations: 15000 | Time: 182.832
Loss: 2.1839 | Iterations: 20000 | Time: 243.840
Loss: 2.1106 | Iterations: 25000 | Time: 305.580
Loss: 2.0353 | Iterations: 30000 | Time: 367.221
Loss: 1.9357 | Iterations: 35000 | Time: 429.709
Loss: 1.8429 | Iterations: 40000 | Time: 491.926
Loss: 1.7682 | Iterations: 45000 | Time: 554.510
L

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_accuracy,▁▁▂▄▅▆▇▇▇█
training_loss,█▆▄▃▂▁▁▁▁▁
validation_accuracy,▁▁▄▆▆▇████

0,1
epoch,10.0
test_accuracy,0.44751
training_accuracy,0.66881
training_loss,0.24291
validation_accuracy,0.53394


[34m[1mwandb[0m: Agent Starting Run: slajca6b with config:
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	embed_size: 16
[34m[1mwandb[0m: 	hidden_size: 512
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	max_length: 50
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: ADAM
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.5
[34m[1mwandb[0m: 	weight_decay: 1e-05
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Epoch: 1
Loss: 2.1321 | Iterations: 5000 | Time: 122.459
Loss: 1.5427 | Iterations: 10000 | Time: 246.540
Loss: 1.4238 | Iterations: 15000 | Time: 371.198
Loss: 1.3543 | Iterations: 20000 | Time: 495.457
Loss: 1.3455 | Iterations: 25000 | Time: 619.690
Loss: 1.5047 | Iterations: 30000 | Time: 743.806
Loss: 2.2557 | Iterations: 35000 | Time: 866.094
Loss: 2.1068 | Iterations: 40000 | Time: 989.090
Loss: 1.9023 | Iterations: 45000 | Time: 1112.473
Loss: 2.0024 | Iterations: 50000 | Time: 1236.489
Training Accuracy: 0.0008
Validation Accuracy: 0.0059
Epoch: 2
Loss: 1.7689 | Iterations: 5000 | Time: 123.691
Loss: 1.6187 | Iterations: 10000 | Time: 247.662
Loss: 1.5461 | Iterations: 15000 | Time: 371.190
Loss: 1.4958 | Iterations: 20000 | Time: 494.540
Loss: 1.5428 | Iterations: 25000 | Time: 618.088
Loss: 1.7340 | Iterations: 30000 | Time: 742.065
Loss: 1.5514 | Iterations: 35000 | Time: 865.857
Loss: 1.5781 | Iterations: 40000 | Time: 989.976
Loss: 1.5570 | Iterations: 45000 | Time: 1113.

VBox(children=(Label(value='0.003 MB of 0.028 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.114013…

0,1
epoch,▁▅█
test_accuracy,▁
training_accuracy,▁██
training_loss,█▂▁
validation_accuracy,▁█▆

0,1
epoch,3.0
test_accuracy,0.02515
training_accuracy,0.00785
training_loss,1.56146
validation_accuracy,0.03027


[34m[1mwandb[0m: Agent Starting Run: 4dl7vxs3 with config:
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_size: 32
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.005
[34m[1mwandb[0m: 	max_length: 50
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: SGD
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.5
[34m[1mwandb[0m: 	weight_decay: 0.001
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Epoch: 1
Loss: 2.7668 | Iterations: 5000 | Time: 65.171
Loss: 2.5562 | Iterations: 10000 | Time: 130.966
Loss: 2.4020 | Iterations: 15000 | Time: 197.231
Loss: 2.1709 | Iterations: 20000 | Time: 263.047
Loss: 1.9337 | Iterations: 25000 | Time: 329.729
Loss: 1.6520 | Iterations: 30000 | Time: 396.302
Loss: 1.4386 | Iterations: 35000 | Time: 463.161
Loss: 1.2863 | Iterations: 40000 | Time: 530.290
Loss: 1.1408 | Iterations: 45000 | Time: 596.852
Loss: 1.0439 | Iterations: 50000 | Time: 663.941
Training Accuracy: 0.1079
Validation Accuracy: 0.1929
Epoch: 2
Loss: 0.9362 | Iterations: 5000 | Time: 67.369
Loss: 0.8337 | Iterations: 10000 | Time: 134.673
Loss: 0.8130 | Iterations: 15000 | Time: 201.785
Loss: 0.7642 | Iterations: 20000 | Time: 268.955
Loss: 0.7189 | Iterations: 25000 | Time: 336.573
Loss: 0.6803 | Iterations: 30000 | Time: 404.276
Loss: 0.6432 | Iterations: 35000 | Time: 471.327
Loss: 0.6236 | Iterations: 40000 | Time: 538.606
Loss: 0.5907 | Iterations: 45000 | Time: 605.998
L

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
training_accuracy,▁▄▅▆▇▇████
training_loss,█▃▂▂▁▁▁▁▁▁
validation_accuracy,▁▄▆▆▇█████

0,1
epoch,10.0
test_accuracy,0.3916
training_accuracy,0.56668
training_loss,0.29035
validation_accuracy,0.48486


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: d55j9u67 with config:
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_size: 16
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	max_length: 50
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: ADAM
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.5
[34m[1mwandb[0m: 	weight_decay: 1e-05
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Epoch: 1
Loss: 2.0014 | Iterations: 5000 | Time: 70.786
Loss: 1.3063 | Iterations: 10000 | Time: 141.811
Loss: 1.1359 | Iterations: 15000 | Time: 217.893
Loss: 1.1961 | Iterations: 20000 | Time: 297.872
Loss: 1.0591 | Iterations: 25000 | Time: 378.777
Loss: 0.9967 | Iterations: 30000 | Time: 457.406
Loss: 0.9907 | Iterations: 35000 | Time: 534.042
Loss: 1.0269 | Iterations: 40000 | Time: 612.221
Loss: 0.9822 | Iterations: 45000 | Time: 689.577
Loss: 0.9885 | Iterations: 50000 | Time: 766.188
Training Accuracy: 0.0954
Validation Accuracy: 0.1797
Epoch: 2
Loss: 0.9962 | Iterations: 5000 | Time: 71.595
Loss: 0.9369 | Iterations: 10000 | Time: 143.313
Loss: 1.1703 | Iterations: 15000 | Time: 215.561
Loss: 1.3664 | Iterations: 20000 | Time: 290.860
Loss: 1.2199 | Iterations: 25000 | Time: 367.182
Loss: 1.1178 | Iterations: 30000 | Time: 443.498
Loss: 1.0845 | Iterations: 35000 | Time: 519.562
Loss: 1.1077 | Iterations: 40000 | Time: 595.496
Loss: 1.2084 | Iterations: 45000 | Time: 671.897
L

VBox(children=(Label(value='0.003 MB of 0.027 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.116242…

0,1
epoch,▁█
test_accuracy,▁
training_accuracy,█▁
training_loss,█▁
validation_accuracy,█▁

0,1
epoch,2.0
test_accuracy,0.08569
training_accuracy,0.04039
training_loss,1.14244
validation_accuracy,0.09473


[34m[1mwandb[0m: Agent Starting Run: srx3jk2l with config:
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_size: 32
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.005
[34m[1mwandb[0m: 	max_length: 50
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: SGD
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.5
[34m[1mwandb[0m: 	weight_decay: 1e-05
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Epoch: 1
Loss: 2.7382 | Iterations: 5000 | Time: 69.080
Loss: 2.5160 | Iterations: 10000 | Time: 139.621
Loss: 2.2610 | Iterations: 15000 | Time: 210.781
Loss: 1.9513 | Iterations: 20000 | Time: 281.975
Loss: 1.5989 | Iterations: 25000 | Time: 354.327
Loss: 1.3284 | Iterations: 30000 | Time: 426.263
Loss: 1.0921 | Iterations: 35000 | Time: 498.980
Loss: 0.8848 | Iterations: 40000 | Time: 571.995
Loss: 0.7872 | Iterations: 45000 | Time: 644.867
Loss: 0.7082 | Iterations: 50000 | Time: 717.262
Training Accuracy: 0.2839
Validation Accuracy: 0.3220
Epoch: 2
Loss: 0.6426 | Iterations: 5000 | Time: 72.784
Loss: 0.6650 | Iterations: 10000 | Time: 145.716
Loss: 0.6103 | Iterations: 15000 | Time: 218.571
Loss: 1.1754 | Iterations: 20000 | Time: 290.779
Loss: 1.1816 | Iterations: 25000 | Time: 363.184
Loss: 1.0707 | Iterations: 30000 | Time: 435.461
Loss: 0.9742 | Iterations: 35000 | Time: 507.665
Loss: 0.8240 | Iterations: 40000 | Time: 580.375
Loss: 0.7648 | Iterations: 45000 | Time: 653.339
L

VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▃▅▆█
test_accuracy,▁
training_accuracy,▅▅▇█▁
training_loss,█▄▂▁▅
validation_accuracy,▆▆▇█▁

0,1
epoch,5.0
test_accuracy,0.09741
training_accuracy,0.0399
training_loss,1.07223
validation_accuracy,0.10132
