<a href="https://colab.research.google.com/github/arunangshudutta/DA6401_assignment3/blob/main/Ques_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch.optim as optim
from tqdm import tqdm

import shutil
import pandas as pd

from collections import Counter

import torch
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader, Dataset

import torch
import torch.nn as nn

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!ls /content/drive/MyDrive/Colab\ Notebooks/DA6401/assignment\ 3/dakshina_dataset_v1.0/hi/lexicons

hi.translit.sampled.dev.tsv   hi.translit.sampled.train.tsv
hi.translit.sampled.test.tsv


In [None]:
# Source folder in Google Drive
src_folder = '/content/drive/MyDrive/Colab Notebooks/DA6401/assignment 3/dakshina_dataset_v1.0/hi/lexicons'

# Destination in Colab's local storage
dst_folder = '/content/hindi_data'

# Copy the entire folder
shutil.copytree(src_folder, dst_folder)

'/content/hindi_data'

In [None]:
# Load the data
df_train = pd.read_csv("hindi_data/hi.translit.sampled.train.tsv", sep="\t", header=None, names=["target", "input", "num"])
# Drop rows with NaNs (if any)
df_train = df_train.dropna()

df_val = pd.read_csv("hindi_data/hi.translit.sampled.dev.tsv", sep="\t", header=None, names=["target", "input", "num"])
df_val = df_val.dropna()

In [None]:
# Special tokens
PAD_token = "<pad>"
SOS_token = "<sos>"  # start of sequence
EOS_token = "<eos>"  # end of sequence

def build_vocab(sequences):
    vocab = set(char for seq in sequences for char in seq)
    vocab = [PAD_token, SOS_token, EOS_token] + sorted(vocab)
    char2idx = {c: i for i, c in enumerate(vocab)}
    idx2char = {i: c for c, i in char2idx.items()}
    return char2idx, idx2char

input_char2idx, input_idx2char = build_vocab(df_train["input"])
target_char2idx, target_idx2char = build_vocab(df_train["target"])

print(f"Input vocab size: {len(input_char2idx)}")
print(f"Target vocab size: {len(target_char2idx)}")

Input vocab size: 29
Target vocab size: 66


In [None]:
def encode_sequence(seq, char2idx, add_sos_eos=True):
    tokens = [char2idx[c] for c in seq]
    if add_sos_eos:
        tokens = [char2idx[SOS_token]] + tokens + [char2idx[EOS_token]]
    return tokens

input_sequences = [encode_sequence(seq, input_char2idx) for seq in df_train["input"]]
target_sequences = [encode_sequence(seq, target_char2idx) for seq in df_train["target"]]

input_val = [encode_sequence(seq, input_char2idx) for seq in df_val["input"]]
target_val = [encode_sequence(seq, target_char2idx) for seq in df_val["target"]]

In [None]:
class TransliterationDataset(Dataset):
    def __init__(self, inputs, targets):
        self.inputs = inputs
        self.targets = targets

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        return torch.tensor(self.inputs[idx]), torch.tensor(self.targets[idx])

def collate_fn(batch):
    inputs, targets = zip(*batch)
    inputs_padded = pad_sequence(inputs, batch_first=True, padding_value=input_char2idx[PAD_token])
    targets_padded = pad_sequence(targets, batch_first=True, padding_value=target_char2idx[PAD_token])
    return inputs_padded, targets_padded

train_dataset = TransliterationDataset(input_sequences, target_sequences)
train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=True, collate_fn=collate_fn)

val_dataset = TransliterationDataset(input_val, target_val)
val_dataloader = DataLoader(val_dataset, batch_size=128, shuffle=True, collate_fn=collate_fn)

In [None]:


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ---- Flexible Encoder ----
class Encoder(nn.Module):
    def __init__(self, input_dim, embed_dim, hidden_dim, num_layers=1, cell_type="RNN", dropout=0.1):
        super(Encoder, self).__init__()
        self.embedding = nn.Embedding(input_dim, embed_dim)
        rnn_cell = {"RNN": nn.RNN, "LSTM": nn.LSTM, "GRU": nn.GRU}[cell_type]
        self.rnn = rnn_cell(embed_dim, hidden_dim, num_layers=num_layers, batch_first=True)
        self.cell_type = cell_type
        self.dropout = nn.Dropout(dropout)

    def forward(self, src):
        embedded = self.embedding(src)
        embedded = self.dropout(embedded)
        outputs, hidden = self.rnn(embedded)
        return hidden


# ---- Flexible Decoder ----
class Decoder(nn.Module):
    def __init__(self, output_dim, embed_dim, hidden_dim, num_layers=1, cell_type="RNN", dropout=0.1):
        super(Decoder, self).__init__()
        self.embedding = nn.Embedding(output_dim, embed_dim)
        rnn_cell = {"RNN": nn.RNN, "LSTM": nn.LSTM, "GRU": nn.GRU}[cell_type]
        self.rnn = rnn_cell(embed_dim, hidden_dim, num_layers=num_layers, batch_first=True)
        self.fc_out = nn.Linear(hidden_dim, output_dim)
        self.dropout = nn.Dropout(dropout)
        self.cell_type = cell_type

    def forward(self, input, hidden):
        input = input.unsqueeze(1)  # [B] -> [B,1]
        embedded = self.embedding(input)  # [B,1,E]
        embedded = self.dropout(embedded)
        output, hidden = self.rnn(embedded, hidden)
        prediction = self.fc_out(output.squeeze(1))  # [B, V]
        return prediction, hidden


In [None]:
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, sos_idx, eos_idx, cell_type="RNN"):
        super(Seq2Seq, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.sos_idx = sos_idx
        self.eos_idx = eos_idx
        self.cell_type = cell_type

    def forward(self, src, trg, teacher_forcing_ratio=0.5):
        B, T = trg.shape
        output_dim = self.decoder.fc_out.out_features
        outputs = torch.zeros(B, T, output_dim).to(device)

        hidden = self.encoder(src)
        input = torch.tensor([self.sos_idx] * B).to(device)

        for t in range(T):
            output, hidden = self.decoder(input, hidden)
            outputs[:, t, :] = output

            teacher_force = torch.rand(1).item() < teacher_forcing_ratio
            input = trg[:, t] if teacher_force else output.argmax(1)

        return outputs


In [None]:
import wandb

sweep_config = {
    'method': 'bayes',
    'metric': {
      'name': 'val_accuracy',
      'goal': 'maximize'
    },
    'parameters': {
        'embedding_size': {
            'values': [16, 32, 64, 128]
        },
        'num_layers': {
            'values': [1, 2, 3]
        },
        'hidden_layer_size': {
            'values': [32, 64, 128, 256]
        },
        'cell_type': {
            'values': ['LSTM']
        },
        'dropout': {
            'values': [0.2, 0.3]
        },
         'learning_rate': {
            'values': [0.01]
        },
    }
}
wandb.login(key="b81ae278300a216ce5380e07b52d628e2dd6eb69")
sweep_id = wandb.sweep(sweep = sweep_config, entity="arunangshudutta218-iitm", project = 'dl_assgn_3_q_2new')

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Create sweep with ID: eh8i93v5
Sweep URL: https://wandb.ai/arunangshudutta218-iitm/dl_assgn_3_q_2new/sweeps/eh8i93v5


In [None]:
def main():
  with wandb.init() as run:

    em_sz = wandb.config.embedding_size
    num_lay = wandb.config.num_layers
    hid_sz = wandb.config.hidden_layer_size
    cty = wandb.config.cell_type
    dr = wandb.config.dropout
    lr = wandb.config.learning_rate



    wandb.run.name = "cty_{}_lay_{}_hsz_{}_emsz_{}_dr_{}_lr_{}".format(cty, num_lay, hid_sz, em_sz, dr, lr)



    # Initialize model
    encoder = Encoder(len(input_char2idx), embed_dim = em_sz, hidden_dim = hid_sz, num_layers=num_lay, cell_type=cty, dropout=dr).to(device)
    decoder = Decoder(len(target_char2idx), embed_dim = em_sz, hidden_dim = hid_sz, num_layers=num_lay, cell_type=cty, dropout=dr).to(device)
    model = Seq2Seq(encoder, decoder, sos_idx=target_char2idx['<sos>'], eos_idx=target_char2idx['<eos>'], cell_type=cty).to(device)

    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss(ignore_index=target_char2idx["<pad>"])

    # Training Loop
    for epoch in range(1, 11):
        model.train()
        epoch_loss = 0
        for src, trg in tqdm(train_dataloader):
            src, trg = src.to(device), trg.to(device)
            optimizer.zero_grad()

            output = model(src, trg)  # output: (B, T, V)
            output = output.view(-1, len(target_char2idx))
            trg = trg.view(-1)

            loss = criterion(output, trg)
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()

        # --- Validation ---
        model.eval()
        total_tokens = 0
        correct_tokens = 0
        with torch.no_grad():
            for val_src, val_trg in val_dataloader:
                val_src, val_trg = val_src.to(device), val_trg.to(device)
                val_output = model(val_src, val_trg)  # shape: (B, T, V)
                val_pred = val_output.argmax(dim=-1)  # (B, T)

                mask = val_trg != target_char2idx["<pad>"]
                correct = (val_pred == val_trg) & mask
                correct_tokens += correct.sum().item()
                total_tokens += mask.sum().item()

        val_accuracy = correct_tokens / total_tokens * 100

        print(f"Epoch {epoch} | Train Loss: {epoch_loss / len(train_dataloader):.4f} | Val Acc: {val_accuracy:.2f}%")

        wandb.log({'tr_loss' : epoch_loss / len(train_dataloader), 'val_accuracy' : val_accuracy})

wandb.agent(sweep_id, function = main, count = 10)
wandb.finish()

[34m[1mwandb[0m: Agent Starting Run: lafhfhyv with config:
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_layer_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	num_layers: 1


100%|██████████| 346/346 [00:09<00:00, 35.47it/s]


Epoch 1 | Train Loss: 2.6445 | Val Acc: 34.20%


100%|██████████| 346/346 [00:08<00:00, 40.42it/s]


Epoch 2 | Train Loss: 2.2989 | Val Acc: 44.24%


100%|██████████| 346/346 [00:09<00:00, 35.28it/s]


Epoch 3 | Train Loss: 2.0297 | Val Acc: 47.35%


100%|██████████| 346/346 [00:09<00:00, 35.03it/s]


Epoch 4 | Train Loss: 1.9007 | Val Acc: 48.71%


100%|██████████| 346/346 [00:08<00:00, 39.74it/s]


Epoch 5 | Train Loss: 1.8352 | Val Acc: 50.20%


100%|██████████| 346/346 [00:09<00:00, 36.15it/s]


Epoch 6 | Train Loss: 1.7899 | Val Acc: 51.48%


100%|██████████| 346/346 [00:09<00:00, 35.42it/s]


Epoch 7 | Train Loss: 1.7497 | Val Acc: 51.82%


100%|██████████| 346/346 [00:08<00:00, 38.56it/s]


Epoch 8 | Train Loss: 1.6867 | Val Acc: 53.19%


100%|██████████| 346/346 [00:09<00:00, 37.81it/s]


Epoch 9 | Train Loss: 1.6415 | Val Acc: 54.14%


100%|██████████| 346/346 [00:09<00:00, 35.23it/s]


Epoch 10 | Train Loss: 1.6085 | Val Acc: 54.87%


0,1
tr_loss,█▆▄▃▃▂▂▂▁▁
val_accuracy,▁▄▅▆▆▇▇▇██

0,1
tr_loss,1.60849
val_accuracy,54.8682


[34m[1mwandb[0m: Agent Starting Run: 343vcua7 with config:
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 128
[34m[1mwandb[0m: 	hidden_layer_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	num_layers: 3


100%|██████████| 346/346 [00:17<00:00, 20.28it/s]


Epoch 1 | Train Loss: 2.4295 | Val Acc: 45.78%


100%|██████████| 346/346 [00:15<00:00, 21.88it/s]


Epoch 2 | Train Loss: 1.7731 | Val Acc: 58.82%


100%|██████████| 346/346 [00:15<00:00, 21.81it/s]


Epoch 3 | Train Loss: 1.2488 | Val Acc: 66.45%


100%|██████████| 346/346 [00:15<00:00, 21.89it/s]


Epoch 4 | Train Loss: 1.0014 | Val Acc: 71.12%


100%|██████████| 346/346 [00:15<00:00, 21.92it/s]


Epoch 5 | Train Loss: 0.8613 | Val Acc: 72.95%


100%|██████████| 346/346 [00:16<00:00, 20.58it/s]


Epoch 6 | Train Loss: 0.7744 | Val Acc: 74.36%


100%|██████████| 346/346 [00:15<00:00, 21.81it/s]


Epoch 7 | Train Loss: 0.7248 | Val Acc: 75.66%


100%|██████████| 346/346 [00:15<00:00, 21.74it/s]


Epoch 8 | Train Loss: 0.6723 | Val Acc: 75.94%


100%|██████████| 346/346 [00:15<00:00, 21.71it/s]


Epoch 9 | Train Loss: 0.6299 | Val Acc: 75.94%


100%|██████████| 346/346 [00:15<00:00, 21.79it/s]


Epoch 10 | Train Loss: 0.6099 | Val Acc: 75.88%


0,1
tr_loss,█▅▃▃▂▂▁▁▁▁
val_accuracy,▁▄▆▇▇█████

0,1
tr_loss,0.60986
val_accuracy,75.8753


[34m[1mwandb[0m: Agent Starting Run: c9kv6ikn with config:
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 128
[34m[1mwandb[0m: 	hidden_layer_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	num_layers: 3


100%|██████████| 346/346 [00:15<00:00, 21.68it/s]


Epoch 1 | Train Loss: 2.5754 | Val Acc: 38.01%


100%|██████████| 346/346 [00:15<00:00, 21.94it/s]


Epoch 2 | Train Loss: 1.9978 | Val Acc: 54.10%


100%|██████████| 346/346 [00:15<00:00, 21.78it/s]


Epoch 3 | Train Loss: 1.4278 | Val Acc: 66.04%


100%|██████████| 346/346 [00:16<00:00, 20.52it/s]


Epoch 4 | Train Loss: 1.1195 | Val Acc: 70.02%


100%|██████████| 346/346 [00:15<00:00, 21.63it/s]


Epoch 5 | Train Loss: 0.9483 | Val Acc: 71.34%


100%|██████████| 346/346 [00:15<00:00, 22.00it/s]


Epoch 6 | Train Loss: 0.8571 | Val Acc: 74.12%


100%|██████████| 346/346 [00:15<00:00, 22.04it/s]


Epoch 7 | Train Loss: 0.7791 | Val Acc: 74.23%


100%|██████████| 346/346 [00:15<00:00, 22.16it/s]


Epoch 8 | Train Loss: 0.7268 | Val Acc: 75.46%


100%|██████████| 346/346 [00:16<00:00, 21.02it/s]


Epoch 9 | Train Loss: 0.6922 | Val Acc: 75.31%


100%|██████████| 346/346 [00:16<00:00, 21.14it/s]


Epoch 10 | Train Loss: 0.6522 | Val Acc: 76.03%


0,1
tr_loss,█▆▄▃▂▂▁▁▁▁
val_accuracy,▁▄▆▇▇█████

0,1
tr_loss,0.65219
val_accuracy,76.03392


[34m[1mwandb[0m: Agent Starting Run: reun48zs with config:
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_layer_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	num_layers: 3


100%|██████████| 346/346 [00:16<00:00, 21.37it/s]


Epoch 1 | Train Loss: 2.6354 | Val Acc: 33.66%


100%|██████████| 346/346 [00:17<00:00, 20.29it/s]


Epoch 2 | Train Loss: 2.4914 | Val Acc: 33.89%


100%|██████████| 346/346 [00:16<00:00, 21.43it/s]


Epoch 3 | Train Loss: 2.4416 | Val Acc: 34.71%


100%|██████████| 346/346 [00:16<00:00, 20.84it/s]


Epoch 4 | Train Loss: 2.4086 | Val Acc: 34.00%


100%|██████████| 346/346 [00:16<00:00, 21.11it/s]


Epoch 5 | Train Loss: 2.4030 | Val Acc: 34.80%


100%|██████████| 346/346 [00:17<00:00, 19.81it/s]


Epoch 6 | Train Loss: 2.3887 | Val Acc: 35.22%


100%|██████████| 346/346 [00:17<00:00, 19.89it/s]


Epoch 7 | Train Loss: 2.3767 | Val Acc: 35.17%


100%|██████████| 346/346 [00:16<00:00, 20.85it/s]


Epoch 8 | Train Loss: 2.3748 | Val Acc: 34.29%


100%|██████████| 346/346 [00:16<00:00, 20.90it/s]


Epoch 9 | Train Loss: 2.2924 | Val Acc: 37.95%


100%|██████████| 346/346 [00:16<00:00, 20.52it/s]


Epoch 10 | Train Loss: 2.1601 | Val Acc: 41.30%


0,1
tr_loss,█▆▅▅▅▄▄▄▃▁
val_accuracy,▁▁▂▁▂▂▂▂▅█

0,1
tr_loss,2.16009
val_accuracy,41.29607


[34m[1mwandb[0m: Agent Starting Run: f46fetg4 with config:
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	hidden_layer_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	num_layers: 3


100%|██████████| 346/346 [00:14<00:00, 23.74it/s]


Epoch 1 | Train Loss: 2.6661 | Val Acc: 34.44%


100%|██████████| 346/346 [00:14<00:00, 23.78it/s]


Epoch 2 | Train Loss: 2.2807 | Val Acc: 42.35%


100%|██████████| 346/346 [00:14<00:00, 23.48it/s]


Epoch 3 | Train Loss: 1.9917 | Val Acc: 47.49%


100%|██████████| 346/346 [00:14<00:00, 23.65it/s]


Epoch 4 | Train Loss: 1.7862 | Val Acc: 53.05%


100%|██████████| 346/346 [00:14<00:00, 23.95it/s]


Epoch 5 | Train Loss: 1.6360 | Val Acc: 55.98%


100%|██████████| 346/346 [00:14<00:00, 23.78it/s]


Epoch 6 | Train Loss: 1.5208 | Val Acc: 58.72%


100%|██████████| 346/346 [00:14<00:00, 23.92it/s]


Epoch 7 | Train Loss: 1.4422 | Val Acc: 60.05%


100%|██████████| 346/346 [00:14<00:00, 23.69it/s]


Epoch 8 | Train Loss: 1.3837 | Val Acc: 62.16%


100%|██████████| 346/346 [00:14<00:00, 23.09it/s]


Epoch 9 | Train Loss: 1.3265 | Val Acc: 62.75%


100%|██████████| 346/346 [00:15<00:00, 22.92it/s]


Epoch 10 | Train Loss: 1.2766 | Val Acc: 63.75%


0,1
tr_loss,█▆▅▄▃▂▂▂▁▁
val_accuracy,▁▃▄▅▆▇▇███

0,1
tr_loss,1.27659
val_accuracy,63.74517


[34m[1mwandb[0m: Agent Starting Run: io7kx0y8 with config:
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	hidden_layer_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	num_layers: 1


100%|██████████| 346/346 [00:11<00:00, 29.98it/s]


Epoch 1 | Train Loss: 2.5888 | Val Acc: 34.56%


100%|██████████| 346/346 [00:11<00:00, 29.70it/s]


Epoch 2 | Train Loss: 2.0623 | Val Acc: 53.50%


100%|██████████| 346/346 [00:10<00:00, 32.45it/s]


Epoch 3 | Train Loss: 1.4663 | Val Acc: 63.11%


100%|██████████| 346/346 [00:10<00:00, 33.13it/s]


Epoch 4 | Train Loss: 1.2056 | Val Acc: 67.04%


100%|██████████| 346/346 [00:11<00:00, 30.43it/s]


Epoch 5 | Train Loss: 1.0788 | Val Acc: 69.09%


100%|██████████| 346/346 [00:11<00:00, 30.60it/s]


Epoch 6 | Train Loss: 0.9748 | Val Acc: 70.70%


100%|██████████| 346/346 [00:11<00:00, 30.70it/s]


Epoch 7 | Train Loss: 0.9034 | Val Acc: 72.89%


100%|██████████| 346/346 [00:11<00:00, 30.72it/s]


Epoch 8 | Train Loss: 0.8485 | Val Acc: 72.08%


100%|██████████| 346/346 [00:09<00:00, 34.63it/s]


Epoch 9 | Train Loss: 0.8084 | Val Acc: 73.24%


100%|██████████| 346/346 [00:11<00:00, 30.73it/s]


Epoch 10 | Train Loss: 0.7655 | Val Acc: 74.24%


0,1
tr_loss,█▆▄▃▂▂▂▁▁▁
val_accuracy,▁▄▆▇▇▇████

0,1
tr_loss,0.76545
val_accuracy,74.24006


[34m[1mwandb[0m: Agent Starting Run: 91pea337 with config:
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	hidden_layer_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	num_layers: 2


100%|██████████| 346/346 [00:11<00:00, 29.28it/s]


Epoch 1 | Train Loss: 2.4747 | Val Acc: 43.57%


100%|██████████| 346/346 [00:11<00:00, 29.26it/s]


Epoch 2 | Train Loss: 1.7964 | Val Acc: 54.63%


100%|██████████| 346/346 [00:12<00:00, 28.33it/s]


Epoch 3 | Train Loss: 1.5090 | Val Acc: 60.93%


100%|██████████| 346/346 [00:12<00:00, 27.67it/s]


Epoch 4 | Train Loss: 1.2976 | Val Acc: 65.26%


100%|██████████| 346/346 [00:12<00:00, 27.31it/s]


Epoch 5 | Train Loss: 1.1665 | Val Acc: 66.89%


100%|██████████| 346/346 [00:12<00:00, 27.71it/s]


Epoch 6 | Train Loss: 1.0720 | Val Acc: 69.82%


100%|██████████| 346/346 [00:12<00:00, 27.70it/s]


Epoch 7 | Train Loss: 1.0068 | Val Acc: 71.30%


100%|██████████| 346/346 [00:12<00:00, 27.91it/s]


Epoch 8 | Train Loss: 0.9538 | Val Acc: 70.99%


100%|██████████| 346/346 [00:12<00:00, 27.76it/s]


Epoch 9 | Train Loss: 0.9134 | Val Acc: 72.26%


100%|██████████| 346/346 [00:12<00:00, 28.35it/s]


Epoch 10 | Train Loss: 0.8804 | Val Acc: 73.25%


0,1
tr_loss,█▅▄▃▂▂▂▁▁▁
val_accuracy,▁▄▅▆▆▇█▇██

0,1
tr_loss,0.8804
val_accuracy,73.25085


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: xcu3n5ns with config:
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	num_layers: 2


100%|██████████| 346/346 [00:12<00:00, 26.73it/s]


Epoch 1 | Train Loss: 2.3461 | Val Acc: 51.49%


100%|██████████| 346/346 [00:12<00:00, 27.98it/s]


Epoch 2 | Train Loss: 1.4636 | Val Acc: 64.52%


100%|██████████| 346/346 [00:11<00:00, 29.15it/s]


Epoch 3 | Train Loss: 1.1252 | Val Acc: 70.46%


100%|██████████| 346/346 [00:11<00:00, 29.02it/s]


Epoch 4 | Train Loss: 0.9600 | Val Acc: 72.75%


100%|██████████| 346/346 [00:12<00:00, 27.63it/s]


Epoch 5 | Train Loss: 0.8679 | Val Acc: 72.78%


100%|██████████| 346/346 [00:12<00:00, 27.32it/s]


Epoch 6 | Train Loss: 0.8073 | Val Acc: 74.28%


100%|██████████| 346/346 [00:12<00:00, 27.13it/s]


Epoch 7 | Train Loss: 0.7522 | Val Acc: 75.84%


100%|██████████| 346/346 [00:12<00:00, 27.31it/s]


Epoch 8 | Train Loss: 0.7301 | Val Acc: 76.13%


100%|██████████| 346/346 [00:12<00:00, 27.49it/s]


Epoch 9 | Train Loss: 0.6908 | Val Acc: 76.92%


100%|██████████| 346/346 [00:12<00:00, 27.13it/s]


Epoch 10 | Train Loss: 0.6749 | Val Acc: 76.78%


0,1
tr_loss,█▄▃▂▂▂▁▁▁▁
val_accuracy,▁▅▆▇▇▇████

0,1
tr_loss,0.67489
val_accuracy,76.77511


[34m[1mwandb[0m: Agent Starting Run: 1xksn0w3 with config:
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	num_layers: 2


100%|██████████| 346/346 [00:11<00:00, 28.92it/s]


Epoch 1 | Train Loss: 2.4722 | Val Acc: 44.79%


100%|██████████| 346/346 [00:12<00:00, 26.99it/s]


Epoch 2 | Train Loss: 1.7903 | Val Acc: 56.93%


100%|██████████| 346/346 [00:12<00:00, 27.23it/s]


Epoch 3 | Train Loss: 1.4026 | Val Acc: 64.49%


100%|██████████| 346/346 [00:12<00:00, 26.98it/s]


Epoch 4 | Train Loss: 1.1700 | Val Acc: 68.22%


100%|██████████| 346/346 [00:12<00:00, 27.39it/s]


Epoch 5 | Train Loss: 1.0243 | Val Acc: 71.94%


100%|██████████| 346/346 [00:12<00:00, 26.99it/s]


Epoch 6 | Train Loss: 0.9499 | Val Acc: 72.66%


100%|██████████| 346/346 [00:12<00:00, 27.02it/s]


Epoch 7 | Train Loss: 0.8865 | Val Acc: 72.89%


100%|██████████| 346/346 [00:12<00:00, 26.78it/s]


Epoch 8 | Train Loss: 0.8312 | Val Acc: 73.54%


100%|██████████| 346/346 [00:12<00:00, 26.93it/s]


Epoch 9 | Train Loss: 0.7867 | Val Acc: 75.21%


100%|██████████| 346/346 [00:12<00:00, 26.95it/s]


Epoch 10 | Train Loss: 0.7665 | Val Acc: 74.71%


0,1
tr_loss,█▅▄▃▂▂▁▁▁▁
val_accuracy,▁▄▆▆▇▇▇███

0,1
tr_loss,0.7665
val_accuracy,74.71016


[34m[1mwandb[0m: Agent Starting Run: cc0wfb73 with config:
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 128
[34m[1mwandb[0m: 	hidden_layer_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	num_layers: 3


100%|██████████| 346/346 [00:18<00:00, 19.06it/s]


Epoch 1 | Train Loss: 2.6092 | Val Acc: 34.08%


100%|██████████| 346/346 [00:16<00:00, 20.43it/s]


Epoch 2 | Train Loss: 2.2028 | Val Acc: 52.11%


100%|██████████| 346/346 [00:16<00:00, 20.67it/s]


Epoch 3 | Train Loss: 1.5334 | Val Acc: 60.74%


100%|██████████| 346/346 [00:16<00:00, 20.38it/s]


Epoch 4 | Train Loss: 1.3126 | Val Acc: 63.85%


100%|██████████| 346/346 [00:17<00:00, 19.24it/s]


Epoch 5 | Train Loss: 1.1648 | Val Acc: 65.62%


100%|██████████| 346/346 [00:16<00:00, 20.51it/s]


Epoch 6 | Train Loss: 1.0681 | Val Acc: 68.91%


100%|██████████| 346/346 [00:16<00:00, 20.62it/s]


Epoch 7 | Train Loss: 0.9862 | Val Acc: 71.23%


100%|██████████| 346/346 [00:16<00:00, 20.70it/s]


Epoch 8 | Train Loss: 0.9138 | Val Acc: 70.79%


100%|██████████| 346/346 [00:17<00:00, 20.19it/s]


Epoch 9 | Train Loss: 0.8621 | Val Acc: 71.53%


100%|██████████| 346/346 [00:17<00:00, 19.77it/s]


Epoch 10 | Train Loss: 0.8190 | Val Acc: 72.79%


0,1
tr_loss,█▆▄▃▂▂▂▁▁▁
val_accuracy,▁▄▆▆▇▇████

0,1
tr_loss,0.81899
val_accuracy,72.78653


In [None]:
device

device(type='cuda')