<a href="https://colab.research.google.com/github/arunangshudutta/DA6401_assignment3/blob/main/Ques_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch.optim as optim
from tqdm import tqdm

import shutil
import pandas as pd

from collections import Counter

import torch
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader, Dataset

import torch
import torch.nn as nn

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
!ls /content/drive/MyDrive/Colab\ Notebooks/DA6401/assignment\ 3/dakshina_dataset_v1.0/hi/lexicons

hi.translit.sampled.dev.tsv   hi.translit.sampled.train.tsv
hi.translit.sampled.test.tsv


In [4]:
# Source folder in Google Drive
src_folder = '/content/drive/MyDrive/Colab Notebooks/DA6401/assignment 3/dakshina_dataset_v1.0/hi/lexicons'

# Destination in Colab's local storage
dst_folder = '/content/hindi_data'

# Copy the entire folder
shutil.copytree(src_folder, dst_folder)

'/content/hindi_data'

In [5]:
# Load the data
df_train = pd.read_csv("hindi_data/hi.translit.sampled.train.tsv", sep="\t", header=None, names=["target", "input", "num"])
# Drop rows with NaNs (if any)
df_train = df_train.dropna()

df_val = pd.read_csv("hindi_data/hi.translit.sampled.dev.tsv", sep="\t", header=None, names=["target", "input", "num"])
df_val = df_val.dropna()

In [6]:
# Special tokens
PAD_token = "<pad>"
SOS_token = "<sos>"  # start of sequence
EOS_token = "<eos>"  # end of sequence

def build_vocab(sequences):
    vocab = set(char for seq in sequences for char in seq)
    vocab = [PAD_token, SOS_token, EOS_token] + sorted(vocab)
    char2idx = {c: i for i, c in enumerate(vocab)}
    idx2char = {i: c for c, i in char2idx.items()}
    return char2idx, idx2char

input_char2idx, input_idx2char = build_vocab(df_train["input"])
target_char2idx, target_idx2char = build_vocab(df_train["target"])

print(f"Input vocab size: {len(input_char2idx)}")
print(f"Target vocab size: {len(target_char2idx)}")

Input vocab size: 29
Target vocab size: 66


In [7]:
def encode_sequence(seq, char2idx, add_sos_eos=True):
    tokens = [char2idx[c] for c in seq]
    if add_sos_eos:
        tokens = [char2idx[SOS_token]] + tokens + [char2idx[EOS_token]]
    return tokens

input_sequences = [encode_sequence(seq, input_char2idx) for seq in df_train["input"]]
target_sequences = [encode_sequence(seq, target_char2idx) for seq in df_train["target"]]

input_val = [encode_sequence(seq, input_char2idx) for seq in df_val["input"]]
target_val = [encode_sequence(seq, target_char2idx) for seq in df_val["target"]]

In [8]:
class TransliterationDataset(Dataset):
    def __init__(self, inputs, targets):
        self.inputs = inputs
        self.targets = targets

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        return torch.tensor(self.inputs[idx]), torch.tensor(self.targets[idx])

def collate_fn(batch):
    inputs, targets = zip(*batch)
    inputs_padded = pad_sequence(inputs, batch_first=True, padding_value=input_char2idx[PAD_token])
    targets_padded = pad_sequence(targets, batch_first=True, padding_value=target_char2idx[PAD_token])
    return inputs_padded, targets_padded

train_dataset = TransliterationDataset(input_sequences, target_sequences)
train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=True, collate_fn=collate_fn)

val_dataset = TransliterationDataset(input_val, target_val)
val_dataloader = DataLoader(val_dataset, batch_size=128, shuffle=True, collate_fn=collate_fn)

In [9]:


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ---- Flexible Encoder ----
class Encoder(nn.Module):
    def __init__(self, input_dim, embed_dim, hidden_dim, num_layers=1, cell_type="RNN", dropout=0.1):
        super(Encoder, self).__init__()
        self.embedding = nn.Embedding(input_dim, embed_dim)
        rnn_cell = {"RNN": nn.RNN, "LSTM": nn.LSTM, "GRU": nn.GRU}[cell_type]
        self.rnn = rnn_cell(embed_dim, hidden_dim, num_layers=num_layers, batch_first=True)
        self.cell_type = cell_type
        self.dropout = nn.Dropout(dropout)

    def forward(self, src):
        embedded = self.embedding(src)
        embedded = self.dropout(embedded)
        outputs, hidden = self.rnn(embedded)
        return hidden


# ---- Flexible Decoder ----
class Decoder(nn.Module):
    def __init__(self, output_dim, embed_dim, hidden_dim, num_layers=1, cell_type="RNN", dropout=0.1):
        super(Decoder, self).__init__()
        self.embedding = nn.Embedding(output_dim, embed_dim)
        rnn_cell = {"RNN": nn.RNN, "LSTM": nn.LSTM, "GRU": nn.GRU}[cell_type]
        self.rnn = rnn_cell(embed_dim, hidden_dim, num_layers=num_layers, batch_first=True)
        self.fc_out = nn.Linear(hidden_dim, output_dim)
        self.dropout = nn.Dropout(dropout)
        self.cell_type = cell_type

    def forward(self, input, hidden):
        input = input.unsqueeze(1)  # [B] -> [B,1]
        embedded = self.embedding(input)  # [B,1,E]
        embedded = self.dropout(embedded)
        output, hidden = self.rnn(embedded, hidden)
        prediction = self.fc_out(output.squeeze(1))  # [B, V]
        return prediction, hidden


In [10]:
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, sos_idx, eos_idx, cell_type="RNN"):
        super(Seq2Seq, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.sos_idx = sos_idx
        self.eos_idx = eos_idx
        self.cell_type = cell_type

    def forward(self, src, trg, teacher_forcing_ratio=0.5):
        B, T = trg.shape
        output_dim = self.decoder.fc_out.out_features
        outputs = torch.zeros(B, T, output_dim).to(device)

        hidden = self.encoder(src)
        input = torch.tensor([self.sos_idx] * B).to(device)

        for t in range(T):
            output, hidden = self.decoder(input, hidden)
            outputs[:, t, :] = output

            teacher_force = torch.rand(1).item() < teacher_forcing_ratio
            input = trg[:, t] if teacher_force else output.argmax(1)

        return outputs


In [11]:
import wandb

sweep_config = {
    'method': 'bayes',
    'metric': {
      'name': 'val_accuracy',
      'goal': 'maximize'
    },
    'parameters': {
        'embedding_size': {
            'values': [16, 32, 64, 256]
        },
        'num_layers': {
            'values': [1, 2, 3]
        },
        'hidden_layer_size': {
            'values': [256]
        },
        'cell_type': {
            'values': ['RNN', 'LSTM', 'GRU']
        },
        'dropout': {
            'values': [0.2, 0.3]
        },
         'learning_rate': {
            'values': [0.01]
        },
    }
}
wandb.login(key="b81ae278300a216ce5380e07b52d628e2dd6eb69")
sweep_id = wandb.sweep(sweep = sweep_config, entity="arunangshudutta218-iitm", project = 'dl_assgn_3_q_2')

[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33marunangshudutta218[0m ([33marunangshudutta218-iitm[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Create sweep with ID: 6s33kk7x
Sweep URL: https://wandb.ai/arunangshudutta218-iitm/dl_assgn_3_q_2/sweeps/6s33kk7x


In [12]:
def main():
  with wandb.init() as run:

    em_sz = wandb.config.embedding_size
    num_lay = wandb.config.num_layers
    hid_sz = wandb.config.hidden_layer_size
    cty = wandb.config.cell_type
    dr = wandb.config.dropout
    lr = wandb.config.learning_rate



    wandb.run.name = "cty_{}_lay_{}_hsz_{}_emsz_{}_dr_{}_lr_{}".format(cty, num_lay, hid_sz, em_sz, dr, lr)



    # Initialize model
    encoder = Encoder(len(input_char2idx), embed_dim = em_sz, hidden_dim = hid_sz, num_layers=num_lay, cell_type=cty, dropout=dr).to(device)
    decoder = Decoder(len(target_char2idx), embed_dim = em_sz, hidden_dim = hid_sz, num_layers=num_lay, cell_type=cty, dropout=dr).to(device)
    model = Seq2Seq(encoder, decoder, sos_idx=target_char2idx['<sos>'], eos_idx=target_char2idx['<eos>'], cell_type=cty).to(device)

    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss(ignore_index=target_char2idx["<pad>"])

    # Training Loop
    for epoch in range(1, 11):
        model.train()
        epoch_loss = 0
        for src, trg in tqdm(train_dataloader):
            src, trg = src.to(device), trg.to(device)
            optimizer.zero_grad()

            output = model(src, trg)  # output: (B, T, V)
            output = output.view(-1, len(target_char2idx))
            trg = trg.view(-1)

            loss = criterion(output, trg)
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()

        # --- Validation ---
        model.eval()
        total_tokens = 0
        correct_tokens = 0
        with torch.no_grad():
            for val_src, val_trg in val_dataloader:
                val_src, val_trg = val_src.to(device), val_trg.to(device)
                val_output = model(val_src, val_trg)  # shape: (B, T, V)
                val_pred = val_output.argmax(dim=-1)  # (B, T)

                mask = val_trg != target_char2idx["<pad>"]
                correct = (val_pred == val_trg) & mask
                correct_tokens += correct.sum().item()
                total_tokens += mask.sum().item()

        val_accuracy = correct_tokens / total_tokens * 100

        print(f"Epoch {epoch} | Train Loss: {epoch_loss / len(train_dataloader):.4f} | Val Acc: {val_accuracy:.2f}%")

        wandb.log({'tr_loss' : epoch_loss / len(train_dataloader), 'val_accuracy' : val_accuracy})

wandb.agent(sweep_id, function = main, count = 8)
wandb.finish()

[34m[1mwandb[0m: Agent Starting Run: ttjcv3gv with config:
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	hidden_layer_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	num_layers: 2


100%|██████████| 346/346 [00:10<00:00, 34.35it/s]


Epoch 1 | Train Loss: 2.8679 | Val Acc: 28.82%


100%|██████████| 346/346 [00:09<00:00, 35.63it/s]


Epoch 2 | Train Loss: 2.7564 | Val Acc: 29.97%


100%|██████████| 346/346 [00:09<00:00, 35.21it/s]


Epoch 3 | Train Loss: 2.7568 | Val Acc: 30.82%


100%|██████████| 346/346 [00:09<00:00, 36.69it/s]


Epoch 4 | Train Loss: 2.7359 | Val Acc: 29.73%


100%|██████████| 346/346 [00:09<00:00, 37.15it/s]


Epoch 5 | Train Loss: 2.7453 | Val Acc: 28.98%


100%|██████████| 346/346 [00:09<00:00, 35.40it/s]


Epoch 6 | Train Loss: 2.7288 | Val Acc: 29.17%


100%|██████████| 346/346 [00:09<00:00, 36.03it/s]


Epoch 7 | Train Loss: 2.7275 | Val Acc: 30.12%


100%|██████████| 346/346 [00:08<00:00, 40.55it/s]


Epoch 8 | Train Loss: 2.7277 | Val Acc: 29.64%


100%|██████████| 346/346 [00:09<00:00, 35.41it/s]


Epoch 9 | Train Loss: 2.7334 | Val Acc: 30.73%


100%|██████████| 346/346 [00:09<00:00, 35.62it/s]


Epoch 10 | Train Loss: 2.7318 | Val Acc: 30.13%


0,1
tr_loss,█▂▂▁▂▁▁▁▁▁
val_accuracy,▁▅█▄▂▂▆▄█▆

0,1
tr_loss,2.7318
val_accuracy,30.13209


[34m[1mwandb[0m: Agent Starting Run: l4cxpv7r with config:
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_layer_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	num_layers: 3


100%|██████████| 346/346 [00:16<00:00, 21.13it/s]


Epoch 1 | Train Loss: 2.4172 | Val Acc: 45.00%


100%|██████████| 346/346 [00:15<00:00, 22.49it/s]


Epoch 2 | Train Loss: 1.8299 | Val Acc: 53.97%


100%|██████████| 346/346 [00:15<00:00, 22.24it/s]


Epoch 3 | Train Loss: 1.4665 | Val Acc: 61.39%


100%|██████████| 346/346 [00:15<00:00, 22.38it/s]


Epoch 4 | Train Loss: 1.1805 | Val Acc: 67.22%


100%|██████████| 346/346 [00:15<00:00, 22.38it/s]


Epoch 5 | Train Loss: 1.0095 | Val Acc: 69.94%


100%|██████████| 346/346 [00:16<00:00, 21.11it/s]


Epoch 6 | Train Loss: 0.8985 | Val Acc: 71.11%


100%|██████████| 346/346 [00:15<00:00, 22.21it/s]


Epoch 7 | Train Loss: 0.8316 | Val Acc: 71.50%


100%|██████████| 346/346 [00:15<00:00, 22.33it/s]


Epoch 8 | Train Loss: 0.7698 | Val Acc: 73.65%


100%|██████████| 346/346 [00:15<00:00, 22.39it/s]


Epoch 9 | Train Loss: 0.7341 | Val Acc: 72.47%


100%|██████████| 346/346 [00:15<00:00, 22.36it/s]


Epoch 10 | Train Loss: 0.6974 | Val Acc: 73.72%


0,1
tr_loss,█▆▄▃▂▂▂▁▁▁
val_accuracy,▁▃▅▆▇▇▇███

0,1
tr_loss,0.69739
val_accuracy,73.72094


[34m[1mwandb[0m: Agent Starting Run: clei9me1 with config:
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_layer_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	num_layers: 3


100%|██████████| 346/346 [00:15<00:00, 21.92it/s]


Epoch 1 | Train Loss: 2.6975 | Val Acc: 32.41%


100%|██████████| 346/346 [00:15<00:00, 22.38it/s]


Epoch 2 | Train Loss: 2.5407 | Val Acc: 32.89%


100%|██████████| 346/346 [00:15<00:00, 22.36it/s]


Epoch 3 | Train Loss: 2.4922 | Val Acc: 33.74%


100%|██████████| 346/346 [00:15<00:00, 22.43it/s]


Epoch 4 | Train Loss: 2.4653 | Val Acc: 34.29%


100%|██████████| 346/346 [00:16<00:00, 21.34it/s]


Epoch 5 | Train Loss: 2.4117 | Val Acc: 36.81%


100%|██████████| 346/346 [00:15<00:00, 21.90it/s]


Epoch 6 | Train Loss: 2.3061 | Val Acc: 39.56%


100%|██████████| 346/346 [00:15<00:00, 22.39it/s]


Epoch 7 | Train Loss: 2.1233 | Val Acc: 42.32%


100%|██████████| 346/346 [00:15<00:00, 22.43it/s]


Epoch 8 | Train Loss: 2.0355 | Val Acc: 44.64%


100%|██████████| 346/346 [00:15<00:00, 22.34it/s]


Epoch 9 | Train Loss: 1.9316 | Val Acc: 47.61%


100%|██████████| 346/346 [00:15<00:00, 21.92it/s]


Epoch 10 | Train Loss: 1.8414 | Val Acc: 48.88%


0,1
tr_loss,█▇▆▆▆▅▃▃▂▁
val_accuracy,▁▁▂▂▃▄▅▆▇█

0,1
tr_loss,1.84137
val_accuracy,48.87812


[34m[1mwandb[0m: Agent Starting Run: 6tmoyt5c with config:
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_layer_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	num_layers: 3


100%|██████████| 346/346 [00:15<00:00, 21.92it/s]


Epoch 1 | Train Loss: 2.5242 | Val Acc: 42.53%


100%|██████████| 346/346 [00:15<00:00, 22.35it/s]


Epoch 2 | Train Loss: 1.6969 | Val Acc: 59.62%


100%|██████████| 346/346 [00:15<00:00, 22.01it/s]


Epoch 3 | Train Loss: 1.2699 | Val Acc: 66.01%


100%|██████████| 346/346 [00:16<00:00, 20.90it/s]


Epoch 4 | Train Loss: 1.0705 | Val Acc: 69.02%


100%|██████████| 346/346 [00:15<00:00, 22.14it/s]


Epoch 5 | Train Loss: 0.9421 | Val Acc: 71.16%


100%|██████████| 346/346 [00:15<00:00, 22.39it/s]


Epoch 6 | Train Loss: 0.8661 | Val Acc: 73.03%


100%|██████████| 346/346 [00:15<00:00, 22.25it/s]


Epoch 7 | Train Loss: 0.8038 | Val Acc: 72.88%


100%|██████████| 346/346 [00:15<00:00, 22.26it/s]


Epoch 8 | Train Loss: 0.7759 | Val Acc: 73.41%


100%|██████████| 346/346 [00:16<00:00, 21.19it/s]


Epoch 9 | Train Loss: 0.7394 | Val Acc: 74.02%


100%|██████████| 346/346 [00:15<00:00, 21.95it/s]


Epoch 10 | Train Loss: 0.7130 | Val Acc: 73.59%


0,1
tr_loss,█▅▃▂▂▂▁▁▁▁
val_accuracy,▁▅▆▇▇█████

0,1
tr_loss,0.71302
val_accuracy,73.59405


[34m[1mwandb[0m: Agent Starting Run: ggz1fwk4 with config:
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_layer_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	num_layers: 3


100%|██████████| 346/346 [00:15<00:00, 22.17it/s]


Epoch 1 | Train Loss: 2.4886 | Val Acc: 45.44%


100%|██████████| 346/346 [00:15<00:00, 21.97it/s]


Epoch 2 | Train Loss: 1.7962 | Val Acc: 56.83%


100%|██████████| 346/346 [00:16<00:00, 20.93it/s]


Epoch 3 | Train Loss: 1.3058 | Val Acc: 65.76%


100%|██████████| 346/346 [00:15<00:00, 22.20it/s]


Epoch 4 | Train Loss: 1.0609 | Val Acc: 68.68%


100%|██████████| 346/346 [00:15<00:00, 22.26it/s]


Epoch 5 | Train Loss: 0.9179 | Val Acc: 71.33%


100%|██████████| 346/346 [00:15<00:00, 22.25it/s]


Epoch 6 | Train Loss: 0.8408 | Val Acc: 71.34%


100%|██████████| 346/346 [00:15<00:00, 22.28it/s]


Epoch 7 | Train Loss: 0.7762 | Val Acc: 74.81%


100%|██████████| 346/346 [00:16<00:00, 21.36it/s]


Epoch 8 | Train Loss: 0.7369 | Val Acc: 73.65%


100%|██████████| 346/346 [00:16<00:00, 21.53it/s]


Epoch 9 | Train Loss: 0.6873 | Val Acc: 75.18%


100%|██████████| 346/346 [00:15<00:00, 22.12it/s]


Epoch 10 | Train Loss: 0.6486 | Val Acc: 74.57%


0,1
tr_loss,█▅▄▃▂▂▁▁▁▁
val_accuracy,▁▄▆▆▇▇████

0,1
tr_loss,0.64865
val_accuracy,74.57461


[34m[1mwandb[0m: Agent Starting Run: q42m4sqp with config:
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_layer_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	num_layers: 3


100%|██████████| 346/346 [00:14<00:00, 23.44it/s]


Epoch 1 | Train Loss: 1.9109 | Val Acc: 60.07%


100%|██████████| 346/346 [00:14<00:00, 23.89it/s]


Epoch 2 | Train Loss: 1.1839 | Val Acc: 65.28%


100%|██████████| 346/346 [00:14<00:00, 24.38it/s]


Epoch 3 | Train Loss: 1.0956 | Val Acc: 67.92%


100%|██████████| 346/346 [00:14<00:00, 23.66it/s]


Epoch 4 | Train Loss: 1.0596 | Val Acc: 67.70%


100%|██████████| 346/346 [00:14<00:00, 23.88it/s]


Epoch 5 | Train Loss: 1.0401 | Val Acc: 68.14%


100%|██████████| 346/346 [00:14<00:00, 23.30it/s]


Epoch 6 | Train Loss: 1.0248 | Val Acc: 67.11%


100%|██████████| 346/346 [00:14<00:00, 23.89it/s]


Epoch 7 | Train Loss: 0.9994 | Val Acc: 69.16%


100%|██████████| 346/346 [00:14<00:00, 23.73it/s]


Epoch 8 | Train Loss: 0.9937 | Val Acc: 70.00%


100%|██████████| 346/346 [00:14<00:00, 23.74it/s]


Epoch 9 | Train Loss: 0.9897 | Val Acc: 68.13%


100%|██████████| 346/346 [00:14<00:00, 23.85it/s]


Epoch 10 | Train Loss: 0.9981 | Val Acc: 68.83%


0,1
tr_loss,█▂▂▂▁▁▁▁▁▁
val_accuracy,▁▅▇▆▇▆▇█▇▇

0,1
tr_loss,0.99812
val_accuracy,68.82679


[34m[1mwandb[0m: Agent Starting Run: vn8wq022 with config:
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_layer_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	num_layers: 3


100%|██████████| 346/346 [00:15<00:00, 22.85it/s]


Epoch 1 | Train Loss: 2.0842 | Val Acc: 56.73%


100%|██████████| 346/346 [00:14<00:00, 23.97it/s]


Epoch 2 | Train Loss: 1.3444 | Val Acc: 63.45%


100%|██████████| 346/346 [00:14<00:00, 23.85it/s]


Epoch 3 | Train Loss: 1.2000 | Val Acc: 64.00%


100%|██████████| 346/346 [00:14<00:00, 23.92it/s]


Epoch 4 | Train Loss: 1.1543 | Val Acc: 66.50%


100%|██████████| 346/346 [00:14<00:00, 23.83it/s]


Epoch 5 | Train Loss: 1.1266 | Val Acc: 66.24%


100%|██████████| 346/346 [00:14<00:00, 23.87it/s]


Epoch 6 | Train Loss: 1.1199 | Val Acc: 66.14%


100%|██████████| 346/346 [00:14<00:00, 23.78it/s]


Epoch 7 | Train Loss: 1.0931 | Val Acc: 66.98%


100%|██████████| 346/346 [00:14<00:00, 23.89it/s]


Epoch 8 | Train Loss: 1.0847 | Val Acc: 64.94%


100%|██████████| 346/346 [00:14<00:00, 23.89it/s]


Epoch 9 | Train Loss: 1.0878 | Val Acc: 66.69%


100%|██████████| 346/346 [00:14<00:00, 23.23it/s]


Epoch 10 | Train Loss: 1.0646 | Val Acc: 67.58%


0,1
tr_loss,█▃▂▂▁▁▁▁▁▁
val_accuracy,▁▅▆▇▇▇█▆▇█

0,1
tr_loss,1.06459
val_accuracy,67.57513


[34m[1mwandb[0m: Agent Starting Run: e1lw3a3g with config:
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_layer_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	num_layers: 3


100%|██████████| 346/346 [00:14<00:00, 23.85it/s]


Epoch 1 | Train Loss: 2.0047 | Val Acc: 58.28%


100%|██████████| 346/346 [00:14<00:00, 23.13it/s]


Epoch 2 | Train Loss: 1.2517 | Val Acc: 63.90%


100%|██████████| 346/346 [00:14<00:00, 23.61it/s]


Epoch 3 | Train Loss: 1.1221 | Val Acc: 66.00%


100%|██████████| 346/346 [00:14<00:00, 23.72it/s]


Epoch 4 | Train Loss: 1.0765 | Val Acc: 66.33%


100%|██████████| 346/346 [00:14<00:00, 23.50it/s]


Epoch 5 | Train Loss: 1.0624 | Val Acc: 67.97%


100%|██████████| 346/346 [00:14<00:00, 23.37it/s]


Epoch 6 | Train Loss: 1.0426 | Val Acc: 67.81%


100%|██████████| 346/346 [00:14<00:00, 23.36it/s]


Epoch 7 | Train Loss: 1.0394 | Val Acc: 67.44%


100%|██████████| 346/346 [00:14<00:00, 24.07it/s]


Epoch 8 | Train Loss: 1.0209 | Val Acc: 68.30%


100%|██████████| 346/346 [00:14<00:00, 24.27it/s]


Epoch 9 | Train Loss: 1.0269 | Val Acc: 66.77%


100%|██████████| 346/346 [00:14<00:00, 24.23it/s]


Epoch 10 | Train Loss: 1.0094 | Val Acc: 68.84%


0,1
tr_loss,█▃▂▁▁▁▁▁▁▁
val_accuracy,▁▅▆▆▇▇▇█▇█

0,1
tr_loss,1.0094
val_accuracy,68.84409


In [13]:
device

device(type='cuda')