In [1]:
import os
import json
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from torch.utils.data import DataLoader, Dataset
import wandb

In [2]:
class TransliterationDataset(Dataset):
    def __init__(self, file_path, source_vocab=None, target_vocab=None, max_length=32, create_vocab=False):
        # Attempt to read tab-separated file containing native and romanized text
        try:
            data = pd.read_csv(file_path, sep='\t', header=None,
                               names=['native', 'roman', 'freq'],
                               usecols=[0, 1], dtype=str)
            print(f"Loaded {len(data)} records from {file_path}")

            # Replace missing entries with empty strings
            data['native'] = data['native'].fillna('')
            data['roman'] = data['roman'].fillna('')

            # Create input-output text pairs for transliteration
            self.data_pairs = list(zip(data['roman'], data['native']))
            print(f"Example pairs: {self.data_pairs[:2]}")
        except Exception as err:
            print(f"Failed to load data: {err}")
            self.data_pairs = [('', '')]  # Fallback entry

        self.max_length = max_length

        # Initialize or use existing vocabulary mappings
        if create_vocab:
            self.source_vocab = {'<pad>': 0, '<unk>': 1, '<sos>':2, '<eos>':3}
            self.target_vocab = {'<pad>': 0, '<unk>': 1, '<sos>':2, '<eos>':3}
            self._generate_vocab()
        else:
            self.source_vocab, self.target_vocab = source_vocab, target_vocab
            if '<eos>' not in self.source_vocab:
                self.source_vocab['<eos>'] = len(self.source_vocab)
            if '<eos>' not in self.target_vocab:
                self.target_vocab['<eos>'] = len(self.target_vocab)

    def _generate_vocab(self):
        # Create vocabularies at the character level from the dataset
        for src_text, tgt_text in self.data_pairs:
            for char in src_text:
                if char not in self.source_vocab:
                    self.source_vocab[char] = len(self.source_vocab)
            for char in tgt_text:
                if char not in self.target_vocab:
                    self.target_vocab[char] = len(self.target_vocab)
        print(f"Vocab sizes — Source: {len(self.source_vocab)}, Target: {len(self.target_vocab)}")

    def __len__(self):
        return len(self.data_pairs)

    def __getitem__(self, idx):
        src_seq, tgt_seq = self.data_pairs[idx]
        
        # Convert characters to indices with bounds checking
        src_idxs = [self.source_vocab['<sos>']]  # Start with SOS token
        for ch in src_seq:
            idx = self.source_vocab.get(ch, self.source_vocab['<unk>'])
            if idx >= len(self.source_vocab):
                idx = self.source_vocab['<unk>']  # Safety check
            src_idxs.append(idx)
            
        tgt_idxs = [self.target_vocab['<sos>']]  # Start with SOS token
        for ch in tgt_seq:
            idx = self.target_vocab.get(ch, self.target_vocab['<unk>'])
            if idx >= len(self.target_vocab):
                idx = self.target_vocab['<unk>']  # Safety check
            tgt_idxs.append(idx)
        
        # Add end-of-sequence token
        src_idxs.append(self.source_vocab['<eos>'])
        tgt_idxs.append(self.target_vocab['<eos>'])
        
        # Add padding
        pad_src = [self.source_vocab['<pad>']] * max(0, self.max_length - len(src_idxs))
        pad_tgt = [self.target_vocab['<pad>']] * max(0, self.max_length - len(tgt_idxs))
        
        # Truncate if necessary and convert to tensor
        src_idxs = (src_idxs + pad_src)[:self.max_length]
        tgt_idxs = (tgt_idxs + pad_tgt)[:self.max_length]
        
        # Make sure padding index is valid
        assert self.source_vocab['<pad>'] < len(self.source_vocab), "Padding index out of bounds for source vocab"
        assert self.target_vocab['<pad>'] < len(self.target_vocab), "Padding index out of bounds for target vocab"
        
        return torch.tensor(src_idxs, dtype=torch.long), torch.tensor(tgt_idxs, dtype=torch.long)

In [3]:
class EncoderRNN(nn.Module):
    def __init__(self, input_vocab_size, embedding_size, hidden_size, num_layers, dropout=0.0, cell_type='GRU'):
        super().__init__()
        self.embedding = nn.Embedding(input_vocab_size, embedding_size, padding_idx=0)
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.cell_type = cell_type
        
        # Choose appropriate RNN cell
        if cell_type == 'RNN':
            rnn_cell = nn.RNN
        elif cell_type == 'GRU':
            rnn_cell = nn.GRU
        elif cell_type == 'LSTM':
            rnn_cell = nn.LSTM
        else:
            raise ValueError(f"RNN type not recognized: {cell_type}")
        
        self.rnn = rnn_cell(
            embedding_size, 
            hidden_size, 
            num_layers=num_layers, 
            batch_first=True,
            dropout=dropout if num_layers > 1 else 0.0
        )
    
    def forward(self, x):
        """
        Args:
            x: input sequence [batch_size, seq_len]
        Returns:
            outputs: encoder outputs [batch_size, seq_len, hidden_size]
            hidden: final hidden state [num_layers, batch_size, hidden_size]
        """
        # Apply embedding layer: [batch_size, seq_len] -> [batch_size, seq_len, embedding_size]
        embedded = self.embedding(x)
        
        # Pass through RNN
        outputs, hidden = self.rnn(embedded)
        
        return outputs, hidden

class DecoderRNN(nn.Module):
    def __init__(self, output_vocab_size, embedding_size, hidden_size, num_layers, dropout=0.0, cell_type='GRU'):
        super().__init__()
        self.output_vocab_size = output_vocab_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.cell_type = cell_type
        
        self.embedding = nn.Embedding(output_vocab_size, embedding_size, padding_idx=0)
        
        # Choose appropriate RNN cell
        if cell_type == 'RNN':
            rnn_cell = nn.RNN
        elif cell_type == 'GRU':
            rnn_cell = nn.GRU
        elif cell_type == 'LSTM':
            rnn_cell = nn.LSTM
        else:
            raise ValueError(f"RNN type not recognized: {cell_type}")
        
        self.rnn = rnn_cell(
            embedding_size, 
            hidden_size, 
            num_layers=num_layers, 
            batch_first=True,
            dropout=dropout if num_layers > 1 else 0.0
        )
        
        # Linear layer to produce output logits
        self.output_projection = nn.Linear(hidden_size, output_vocab_size)
    
    def forward(self, input_token, last_hidden):
        """
        Args:
            input_token: input token indices [batch_size, 1]
            last_hidden: previous decoder hidden state
        Returns:
            output: output logits [batch_size, output_vocab_size]
            hidden: new hidden state
        """
        # Get embedding for input token [batch_size, 1, embedding_size]
        embedded = self.embedding(input_token)
        
        # Pass through RNN
        output, hidden = self.rnn(embedded, last_hidden)
        
        # Project to vocabulary space [batch_size, output_vocab_size]
        output = self.output_projection(output.squeeze(1))
        
        return output, hidden

class Seq2Seq(nn.Module):
    def __init__(self, config, input_vocab_size, output_vocab_size):
        super().__init__()

        # Ensure vocabulary sizes are positive
        assert input_vocab_size > 0, f"Source vocab size invalid: {input_vocab_size}"
        assert output_vocab_size > 0, f"Target vocab size invalid: {output_vocab_size}"

        # Hyperparameter setup
        self.embedding_size = config.embed_dim
        self.hidden_size = config.hidden_dim
        self.rnn_type = config.cell_type
        self.encoder_layers = config.enc_layers
        self.decoder_layers = config.dec_layers
        
        # Initialize encoder and decoder
        self.encoder = EncoderRNN(
            input_vocab_size, 
            config.embed_dim, 
            config.hidden_dim, 
            config.enc_layers, 
            config.dropout, 
            config.cell_type
        )
        
        self.decoder = DecoderRNN(
            output_vocab_size, 
            config.embed_dim, 
            config.hidden_dim, 
            config.dec_layers, 
            config.dropout, 
            config.cell_type
        )
        
        # For accessing embeddings in training/inference
        self.input_embedding = self.encoder.embedding
        self.output_embedding = self.decoder.embedding

        print(f"Initialized model without attention: {self.rnn_type}, Encoder layers: {self.encoder_layers}, "
              f"Decoder layers: {self.decoder_layers}, Embedding: {self.embedding_size}, Hidden: {self.hidden_size}")

    def forward(self, source_seq, target_seq):
        """
        Forward pass through the entire seq2seq model.
        
        Args:
            source_seq: source sequence [batch_size, src_len]
            target_seq: target sequence [batch_size, tgt_len]
        
        Returns:
            outputs: output logits [batch_size, tgt_len-1, output_vocab_size]
        """
        batch_size, src_len = source_seq.size()
        tgt_len = target_seq.size(1)
        device = source_seq.device

        # Index range check and clamping
        if source_seq.max() >= self.input_embedding.num_embeddings:
            print("Warning: Source index out of bounds")
            source_seq = torch.clamp(source_seq, 0, self.input_embedding.num_embeddings - 1)
        if target_seq.max() >= self.output_embedding.num_embeddings:
            print("Warning: Target index out of bounds")
            target_seq = torch.clamp(target_seq, 0, self.output_embedding.num_embeddings - 1)

        try:
            # Encode source sequence
            _, encoder_hidden = self.encoder(source_seq)
            
            # Initialize decoder hidden state with encoder final state
            decoder_hidden = encoder_hidden
            
            # Teacher forcing: feed target as the next input
            decoder_input = target_seq[:, :-1]  # Input: from <sos> to the second-to-last token
            # Store all decoder outputs
            outputs = torch.zeros(batch_size, tgt_len-1, self.decoder.output_vocab_size, device=device)
            
            # Process one token at a time
            for t in range(tgt_len-1):
                # Get token for current timestep
                current_input = target_seq[:, t].unsqueeze(1)
                
                # Forward through decoder
                decoder_output, decoder_hidden = self.decoder(
                    current_input,
                    decoder_hidden
                )
                
                # Store prediction
                outputs[:, t, :] = decoder_output
            
            return outputs
            
        except Exception as err:
            print(f"Forward pass exception: {err}")
            return torch.zeros(batch_size, tgt_len-1, self.decoder.output_vocab_size, device=device)

    def inference(self, source_seq, max_length=50, beam_size=1):
        """
        Generate transliteration using beam search.
        
        Args:
            source_seq: source sequence [batch_size, src_len]
            max_length: maximum length of generated sequence
            beam_size: beam size for beam search
            
        Returns:
            generated_seq: generated sequence [batch_size, max_length]
        """
        batch_size = source_seq.size(0)
        device = source_seq.device
        
        # Encode source sequence
        _, encoder_hidden = self.encoder(source_seq)
        
        if beam_size == 1:
            # Greedy search
            decoder_hidden = encoder_hidden
            decoder_input = torch.tensor([[2]], device=device).repeat(batch_size, 1)  # Start with <sos> token
            generated_tokens = torch.zeros(batch_size, max_length, dtype=torch.long, device=device)
            
            for t in range(max_length):
                # Forward through decoder
                decoder_output, decoder_hidden = self.decoder(
                    decoder_input,
                    decoder_hidden
                )
                
                # Get most likely token
                _, topi = decoder_output.topk(1)
                decoder_input = topi.view(batch_size, 1)
                
                # Save generated token
                generated_tokens[:, t] = decoder_input.squeeze(1)
                
                # Stop if all sequences generated <eos>
                if (decoder_input == 3).all():  # 3 is <eos> token index
                    break
            
            return generated_tokens
        else:
            # Implement beam search here (omitted for brevity)
            # For now, return greedy search result
            return self.inference(source_seq, max_length, beam_size=1)

In [4]:
def compute_accuracy(logits, target, pad_idx=0):
    """
    Computes accuracy excluding padding tokens.
    """
    preds = logits.argmax(dim=-1)
    mask = target != pad_idx
    correct = (preds == target) & mask
    acc = correct.sum().item() / max(mask.sum().item(), 1)  # Avoid division by zero
    return acc

In [5]:
def run_epoch(model, data_loader, loss_fn, optim, device):
    model.train()
    total_loss = 0.0
    total_acc = 0.0
    total_batches = len(data_loader)
    processed_batches = 0

    for i, (inputs, targets) in enumerate(data_loader):
        try:
            inputs = inputs.to(device)
            targets = targets.to(device)

            # Check for out-of-vocabulary token indices
            if inputs.max().item() >= model.input_embedding.num_embeddings or \
               targets.max().item() >= model.output_embedding.num_embeddings:
                print(f"Skipping batch {i}/{total_batches} - "
                      f"Input max: {inputs.max().item()}, Target max: {targets.max().item()}, "
                      f"Input vocab size: {model.input_embedding.num_embeddings}, "
                      f"Target vocab size: {model.output_embedding.num_embeddings}")
                continue

            # Reset gradients
            optim.zero_grad()

            # Model inference
            output_logits = model(inputs, targets)

            # Flatten predictions and targets for loss function
            pred_flat = output_logits.reshape(-1, output_logits.size(-1))
            true_flat = targets[:, 1:].reshape(-1)  # Shift targets by one for teacher forcing

            # Compute loss and perform optimization
            loss = loss_fn(pred_flat, true_flat)
            loss.backward()
            
            # Apply gradient clipping to prevent exploding gradients
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            
            optim.step()

            total_loss += loss.item()
                
            # Accuracy
            acc = compute_accuracy(output_logits, targets[:, 1:])
            total_acc += acc
            processed_batches += 1

        except Exception as err:
            print(f"Error in batch {i}/{total_batches}: {err}")
            continue

    if processed_batches == 0:
        return 0.0, 0.0
    return total_loss / processed_batches, total_acc / processed_batches

In [6]:
def evaluate(model, data_loader, loss_fn, device):
    model.eval()
    total_loss = 0.0
    total_acc = 0.0
    processed_batches = 0

    with torch.no_grad():
        for inputs, targets in data_loader:
            try:
                inputs = inputs.to(device)
                targets = targets.to(device)

                # Ensure token indices are within valid vocabulary range
                if inputs.max() >= model.input_embedding.num_embeddings or \
                   targets.max() >= model.output_embedding.num_embeddings:
                    continue  # Skip batch if invalid indices found

                # Forward propagation
                predictions = model(inputs, targets)

                # Flatten tensors for loss calculation
                pred_flat = predictions.reshape(-1, predictions.size(-1))
                target_flat = targets[:, 1:].reshape(-1)  # Shift targets by one for teacher forcing

                # Compute batch loss
                loss = loss_fn(pred_flat, target_flat)
                total_loss += loss.item()
                
                # Compute accuracy
                acc = compute_accuracy(predictions, targets[:, 1:])
                total_acc += acc
                processed_batches += 1

            except Exception as ex:
                print(f"Evaluation error: {ex}")
                continue

    if processed_batches == 0:
        return 0.0, 0.0
    return total_loss / processed_batches, total_acc / processed_batches

In [8]:
sweep_config = {
    'method': 'bayes',
    'metric': {'name': 'val_accuracy', 'goal': 'maximize'},
    'parameters': {
        'embed_dim': {'values': [16, 32, 64, 256]},
        'hidden_dim': {'values': [16, 32, 64, 256]},
        'cell_type': {'values': ['RNN','GRU','LSTM']},
        'enc_layers': {'values': [1, 2, 3]},
        'dec_layers': {'values': [1, 2, 3]},
        'dropout': {'values': [0.2, 0.3]},
        'learning_rate': {'values': [1e-3, 1e-4]},
        'batch_size': {'values': [32, 64]},
        'beam_size': {'values': [1, 3, 5]}
    }
}

In [7]:
def export_vocabularies(directory, source_dict, target_dict):
    """Export source and target vocabularies to JSON format."""
    os.makedirs(directory, exist_ok=True)
    
    src_path = os.path.join(directory, 'src.json')
    tgt_path = os.path.join(directory, 'tgt.json')
    
    with open(src_path, 'w', encoding='utf-8') as src_file:
        json.dump(source_dict, src_file, indent=2, ensure_ascii=False)
        
    with open(tgt_path, 'w', encoding='utf-8') as tgt_file:
        json.dump(target_dict, tgt_file, indent=2, ensure_ascii=False)

def import_vocabularies(directory):
    """Import source and target vocabularies from JSON files."""
    with open(os.path.join(directory, 'src.json'), 'r', encoding='utf-8') as src_file:
        source_dict = json.load(src_file)
        
    with open(os.path.join(directory, 'tgt.json'), 'r', encoding='utf-8') as tgt_file:
        target_dict = json.load(tgt_file)
        
    return source_dict, target_dict

In [8]:
# Dataset paths
path_train = '/kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv'
path_dev = '/kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv'
vocab_dir = '/kaggle/working/vocab'

print("Generating vocabularies...")
train_data = TransliterationDataset(path_train, create_vocab=True)
src_vocab, tgt_vocab = train_data.source_vocab, train_data.target_vocab

export_vocabularies(vocab_dir, src_vocab, tgt_vocab)
print(f"Source vocab: {len(src_vocab)}, Target vocab: {len(tgt_vocab)}")

# Display sample vocab entries
print("Source vocab sample:")
for ch, idx in list(src_vocab.items())[:10]:
    print(f"  {repr(ch)} => {idx}")
print("Target vocab sample:")
for ch, idx in list(tgt_vocab.items())[:10]:
    print(f"  {repr(ch)} => {idx}")

Generating vocabularies...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Vocab sizes — Source: 30, Target: 67
Source vocab: 30, Target vocab: 67
Source vocab sample:
  '<pad>' => 0
  '<unk>' => 1
  '<sos>' => 2
  '<eos>' => 3
  'a' => 4
  'm' => 5
  'k' => 6
  'i' => 7
  't' => 8
  'n' => 9
Target vocab sample:
  '<pad>' => 0
  '<unk>' => 1
  '<sos>' => 2
  '<eos>' => 3
  'అ' => 4
  'ం' => 5
  'క' => 6
  'ి' => 7
  'త' => 8
  'భ' => 9


In [11]:
def run_sweep():
    """Function to run for each sweep configuration"""
    run = wandb.init()
    cfg = run.config
        
    # Create a descriptive run name
    run.name = f"{cfg.cell_type}-e{cfg.embed_dim}-h{cfg.hidden_dim}-enc{cfg.enc_layers}-dec{cfg.dec_layers}-d{cfg.dropout}-lr{cfg.learning_rate}-b{cfg.batch_size}-beam{cfg.beam_size}"
        
    # Set device - force CPU initially if CUDA issues were detected
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    epochs = 20
    # Initialize model
    try:
        # First create on CPU
        model = Seq2Seq(cfg, len(src_vocab), len(tgt_vocab))
        print("Model created on CPU, trying to move to device...")
        # Then try to move to target device
        model = model.to(device)
        print("Model successfully moved to device.")
    except Exception as e:
        print(f"Error initializing model on {device}: {e}")
        print("Falling back to CPU")
        device = torch.device('cpu')
        model = Seq2Seq(cfg, len(src_vocab), len(tgt_vocab)).to(device)
        
    # Load datasets
    try:
        print("Loading datasets...")
        train_dataset = TransliterationDataset(path_train, src_vocab, tgt_vocab)
        dev_dataset = TransliterationDataset(path_dev, src_vocab, tgt_vocab)
            
        # Create data loaders
        train_loader = DataLoader(train_dataset, batch_size=cfg.batch_size, shuffle=True)
        dev_loader = DataLoader(dev_dataset, batch_size=cfg.batch_size)
    except Exception as e:
        print(f"Error loading datasets: {e}")
        return
        
    # Initialize loss function and optimizer
    try:
        criterion = nn.CrossEntropyLoss(ignore_index=0)  # 0 is padding index
        optimizer = optim.Adam(model.parameters(), lr=cfg.learning_rate)
    except Exception as e:
        print(f"Error initializing criterion or optimizer: {e}")
        return
        
    # Training loop
    best_val_acc = 0.0
        
    try:
        for epoch in range(epochs):
            print(f"Epoch {epoch+1}/{epochs}")
                
            # Training
            train_loss, train_acc = run_epoch(model, train_loader, criterion, optimizer, device)
            print(f"Train loss: {train_loss:.4f} Train Accuracy: {train_acc:.4f}")
                
            # Validation
            val_loss, val_acc = evaluate(model, dev_loader, criterion, device)
            print(f"Validation loss: {val_loss:.4f} Validation Accuracy: {val_acc:.4f} ")
                
            # Log metrics
            wandb.log({'train_loss': train_loss, 'val_loss': val_loss, 'train_acc': train_acc, 'val_acc': val_acc, 'epoch': epoch})
                
            # Save best model
            if val_acc > best_val_acc:
                best_val_acc = val_acc
        wandb.log({'val_accuracy': best_val_acc})
                    
    except Exception as e:
        print(f"Error during training: {e}")

In [9]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
secret_value_0 = user_secrets.get_secret("wandb_api")

In [10]:
wandb.login(key = secret_value_0)

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mda24m027[0m ([33mda24m027-indian-institute-of-technology-madras[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [14]:
sweep_id = wandb.sweep(sweep_config, project='DA6401_Assignment3')

Create sweep with ID: zodbp4ie
Sweep URL: https://wandb.ai/da24m027-indian-institute-of-technology-madras/DA6401_Assignment3/sweeps/zodbp4ie


In [15]:
wandb.agent(sweep_id, run_sweep, count=50)

[34m[1mwandb[0m: Agent Starting Run: 4ojq8wom with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dec_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_dim: 16
[34m[1mwandb[0m: 	enc_layers: 1
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0001


Initialized model: GRU, Encoder layers: 1, Decoder layers: 1, Embedding: 16, Hidden: 256
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 3.0276 Train Accuracy: 0.1912
Validation loss: 2.7446 Validation Accuracy: 0.2559 
Epoch 2/20
Train loss: 2.6103 Train Accuracy: 0.2740
Validation loss: 2.5035 Validation Accuracy: 0.3028 
Epoch 3/20
Train loss: 2.3186 Train Accuracy: 0.3466
Validation loss: 2.1728 Validation Accuracy: 0.3957 
Epoch 4/20
Train loss: 1.9679 Train Accuracy: 0.4436
Validation loss: 1.8296 Validation Accuracy: 0.4764 
Epoch 5/20
Train loss: 1.6606 Train Accuracy: 0.5248
Validation loss: 1.5603 Validatio

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▂▃▄▄▅▆▆▆▇▇▇▇▇██████
train_loss,█▇▆▅▄▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁
val_acc,▁▂▃▄▄▅▆▆▇▇▇▇▇███████
val_accuracy,▁
val_loss,█▇▆▅▄▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁

0,1
epoch,19.0
train_acc,0.88018
train_loss,0.41975
val_acc,0.83337
val_accuracy,0.83337
val_loss,0.54821


[34m[1mwandb[0m: Agent Starting Run: je1gmagk with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 2
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_dim: 256
[34m[1mwandb[0m: 	enc_layers: 2
[34m[1mwandb[0m: 	hidden_dim: 16
[34m[1mwandb[0m: 	learning_rate: 0.001


Initialized model: LSTM, Encoder layers: 2, Decoder layers: 2, Embedding: 256, Hidden: 16
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 2.8880 Train Accuracy: 0.1978
Validation loss: 2.6326 Validation Accuracy: 0.2614 
Epoch 2/20
Train loss: 2.5825 Train Accuracy: 0.2757
Validation loss: 2.5089 Validation Accuracy: 0.3009 
Epoch 3/20
Train loss: 2.5029 Train Accuracy: 0.2934
Validation loss: 2.4408 Validation Accuracy: 0.3132 
Epoch 4/20
Train loss: 2.4184 Train Accuracy: 0.3094
Validation loss: 2.3460 Validation Accuracy: 0.3362 
Epoch 5/20
Train loss: 2.3482 Train Accuracy: 0.3274
Validation loss: 2.2567 Validati

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▃▃▄▄▄▅▅▆▆▆▆▇▇▇▇████
train_loss,█▆▆▅▅▄▄▄▃▃▃▂▂▂▂▂▁▁▁▁
val_acc,▁▂▂▃▃▄▄▅▅▅▆▆▆▇▇▇▇███
val_accuracy,▁
val_loss,█▇▇▆▆▅▄▄▄▄▃▃▂▂▂▂▂▁▁▁

0,1
epoch,19.0
train_acc,0.50231
train_loss,1.70524
val_acc,0.54854
val_accuracy,0.54854
val_loss,1.54855


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: g7fpjcs9 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 16
[34m[1mwandb[0m: 	enc_layers: 2
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.001


Initialized model: LSTM, Encoder layers: 2, Decoder layers: 2, Embedding: 16, Hidden: 256
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 2.4062 Train Accuracy: 0.3267
Validation loss: 1.8252 Validation Accuracy: 0.4847 
Epoch 2/20
Train loss: 1.2589 Train Accuracy: 0.6321
Validation loss: 0.9733 Validation Accuracy: 0.7074 
Epoch 3/20
Train loss: 0.6700 Train Accuracy: 0.8010
Validation loss: 0.6244 Validation Accuracy: 0.8077 
Epoch 4/20
Train loss: 0.4280 Train Accuracy: 0.8715
Validation loss: 0.5053 Validation Accuracy: 0.8445 
Epoch 5/20
Train loss: 0.3121 Train Accuracy: 0.9055
Validation loss: 0.4749 Validati

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▄▆▇▇▇██████████████
train_loss,█▅▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁▅▇▇████████████████
val_accuracy,▁
val_loss,█▄▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,19.0
train_acc,0.98355
train_loss,0.05213
val_acc,0.88086
val_accuracy,0.88086
val_loss,0.5301


[34m[1mwandb[0m: Agent Starting Run: fgwotlz2 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 2
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_dim: 16
[34m[1mwandb[0m: 	enc_layers: 2
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0001


Initialized model: LSTM, Encoder layers: 2, Decoder layers: 2, Embedding: 16, Hidden: 256
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 3.0641 Train Accuracy: 0.1661
Validation loss: 2.7204 Validation Accuracy: 0.2488 
Epoch 2/20
Train loss: 2.5881 Train Accuracy: 0.2767
Validation loss: 2.4732 Validation Accuracy: 0.3138 
Epoch 3/20
Train loss: 2.3473 Train Accuracy: 0.3366
Validation loss: 2.2281 Validation Accuracy: 0.3730 
Epoch 4/20
Train loss: 2.0962 Train Accuracy: 0.3991
Validation loss: 1.9470 Validation Accuracy: 0.4371 
Epoch 5/20
Train loss: 1.8262 Train Accuracy: 0.4668
Validation loss: 1.6879 Validati

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▂▃▃▄▅▅▆▆▆▇▇▇▇██████
train_loss,█▇▆▅▅▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁
val_acc,▁▂▂▃▄▅▅▆▆▇▇▇▇▇██████
val_accuracy,▁
val_loss,█▇▆▆▅▄▃▃▃▂▂▂▂▁▁▁▁▁▁▁

0,1
epoch,19.0
train_acc,0.87933
train_loss,0.40624
val_acc,0.84603
val_accuracy,0.84603
val_loss,0.49086


[34m[1mwandb[0m: Agent Starting Run: hrwf80gc with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 16
[34m[1mwandb[0m: 	enc_layers: 2
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0001


Initialized model: LSTM, Encoder layers: 2, Decoder layers: 2, Embedding: 16, Hidden: 256
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 3.0515 Train Accuracy: 0.1738
Validation loss: 2.7055 Validation Accuracy: 0.2518 
Epoch 2/20
Train loss: 2.5488 Train Accuracy: 0.2944
Validation loss: 2.3933 Validation Accuracy: 0.3548 
Epoch 3/20
Train loss: 2.2671 Train Accuracy: 0.3697
Validation loss: 2.1450 Validation Accuracy: 0.4051 
Epoch 4/20
Train loss: 1.9885 Train Accuracy: 0.4388
Validation loss: 1.8591 Validation Accuracy: 0.4737 
Epoch 5/20
Train loss: 1.7255 Train Accuracy: 0.5042
Validation loss: 1.6293 Validati

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▂▃▄▄▅▅▆▆▇▇▇▇▇██████
train_loss,█▇▆▅▅▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁
val_acc,▁▂▃▄▄▅▅▆▆▇▇▇▇▇██████
val_accuracy,▁
val_loss,█▇▆▅▅▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁

0,1
epoch,19.0
train_acc,0.88275
train_loss,0.39419
val_acc,0.845
val_accuracy,0.845
val_loss,0.50797


[34m[1mwandb[0m: Agent Starting Run: eyhptutk with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dec_layers: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 16
[34m[1mwandb[0m: 	enc_layers: 2
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0001


Initialized model: RNN, Encoder layers: 2, Decoder layers: 2, Embedding: 16, Hidden: 256
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 2.9050 Train Accuracy: 0.2173
Validation loss: 2.6920 Validation Accuracy: 0.2669 
Epoch 2/20
Train loss: 2.5784 Train Accuracy: 0.2862
Validation loss: 2.5511 Validation Accuracy: 0.3003 
Epoch 3/20
Train loss: 2.4499 Train Accuracy: 0.3188
Validation loss: 2.4628 Validation Accuracy: 0.3216 
Epoch 4/20
Train loss: 2.3582 Train Accuracy: 0.3398
Validation loss: 2.4025 Validation Accuracy: 0.3360 
Epoch 5/20
Train loss: 2.2862 Train Accuracy: 0.3572
Validation loss: 2.3541 Validatio

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▃▄▄▅▅▆▆▆▆▇▇▇▇▇█████
train_loss,█▆▅▄▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁
val_acc,▁▃▄▅▆▆▆▇▇▇▇█▇▇██████
val_accuracy,▁
val_loss,█▆▅▄▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁

0,1
epoch,19.0
train_acc,0.46855
train_loss,1.84517
val_acc,0.38856
val_accuracy,0.38856
val_loss,2.13477


[34m[1mwandb[0m: Agent Starting Run: fon5fuex with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_dim: 32
[34m[1mwandb[0m: 	enc_layers: 2
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0001


Initialized model: LSTM, Encoder layers: 2, Decoder layers: 3, Embedding: 32, Hidden: 256
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 3.1394 Train Accuracy: 0.1477
Validation loss: 2.7539 Validation Accuracy: 0.2340 
Epoch 2/20
Train loss: 2.5731 Train Accuracy: 0.2838
Validation loss: 2.4061 Validation Accuracy: 0.3324 
Epoch 3/20
Train loss: 2.2625 Train Accuracy: 0.3657
Validation loss: 2.1158 Validation Accuracy: 0.4024 
Epoch 4/20
Train loss: 1.9776 Train Accuracy: 0.4372
Validation loss: 1.7734 Validation Accuracy: 0.4923 
Epoch 5/20
Train loss: 1.6973 Train Accuracy: 0.5072
Validation loss: 1.5097 Validati

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▂▃▄▄▅▆▆▆▇▇▇▇▇██████
train_loss,█▇▆▅▄▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁
val_acc,▁▂▃▄▅▅▆▆▇▇▇▇████████
val_accuracy,▁
val_loss,█▇▆▅▄▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁

0,1
epoch,19.0
train_acc,0.88754
train_loss,0.36526
val_acc,0.86811
val_accuracy,0.86811
val_loss,0.41598


[34m[1mwandb[0m: Agent Starting Run: m5gvlb0b with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_dim: 32
[34m[1mwandb[0m: 	enc_layers: 2
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0001


Initialized model: LSTM, Encoder layers: 2, Decoder layers: 3, Embedding: 32, Hidden: 256
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 3.1261 Train Accuracy: 0.1570
Validation loss: 2.7469 Validation Accuracy: 0.2535 
Epoch 2/20
Train loss: 2.5784 Train Accuracy: 0.2877
Validation loss: 2.3686 Validation Accuracy: 0.3416 
Epoch 3/20
Train loss: 2.2912 Train Accuracy: 0.3570
Validation loss: 2.1463 Validation Accuracy: 0.3985 
Epoch 4/20
Train loss: 2.0577 Train Accuracy: 0.4143
Validation loss: 1.8945 Validation Accuracy: 0.4565 
Epoch 5/20
Train loss: 1.8040 Train Accuracy: 0.4766
Validation loss: 1.6315 Validati

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▂▃▃▄▅▅▆▆▆▇▇▇▇██████
train_loss,█▇▆▅▅▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁
val_acc,▁▂▃▃▄▅▅▆▆▇▇▇▇███████
val_accuracy,▁
val_loss,█▇▆▅▅▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁

0,1
epoch,19.0
train_acc,0.87862
train_loss,0.39213
val_acc,0.86414
val_accuracy,0.86414
val_loss,0.42607


[34m[1mwandb[0m: Agent Starting Run: yzlss3pq with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 32
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.001


Initialized model: LSTM, Encoder layers: 3, Decoder layers: 2, Embedding: 32, Hidden: 256
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 2.0936 Train Accuracy: 0.4088
Validation loss: 1.1750 Validation Accuracy: 0.6463 
Epoch 2/20
Train loss: 0.7518 Train Accuracy: 0.7760
Validation loss: 0.5492 Validation Accuracy: 0.8320 
Epoch 3/20
Train loss: 0.4026 Train Accuracy: 0.8793
Validation loss: 0.4523 Validation Accuracy: 0.8594 
Epoch 4/20
Train loss: 0.2843 Train Accuracy: 0.9136
Validation loss: 0.3897 Validation Accuracy: 0.8802 
Epoch 5/20
Train loss: 0.2198 Train Accuracy: 0.9331
Validation loss: 0.3804 Validati

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▅▇▇▇███████████████
train_loss,█▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁▆▇█████████████████
val_accuracy,▁
val_loss,█▃▂▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂

0,1
epoch,19.0
train_acc,0.98569
train_loss,0.04395
val_acc,0.89412
val_accuracy,0.8952
val_loss,0.48087


[34m[1mwandb[0m: Agent Starting Run: vxvh99y2 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 2
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_dim: 32
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.001


Initialized model: LSTM, Encoder layers: 3, Decoder layers: 2, Embedding: 32, Hidden: 256
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 2.2161 Train Accuracy: 0.3764
Validation loss: 1.3733 Validation Accuracy: 0.5943 
Epoch 2/20
Train loss: 0.8693 Train Accuracy: 0.7384
Validation loss: 0.6214 Validation Accuracy: 0.8102 
Epoch 3/20
Train loss: 0.4570 Train Accuracy: 0.8617
Validation loss: 0.4723 Validation Accuracy: 0.8519 
Epoch 4/20
Train loss: 0.3184 Train Accuracy: 0.9021
Validation loss: 0.4131 Validation Accuracy: 0.8722 
Epoch 5/20
Train loss: 0.2466 Train Accuracy: 0.9245
Validation loss: 0.3881 Validati

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▅▇▇▇▇██████████████
train_loss,█▄▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁▆▇▇████████████████
val_accuracy,▁
val_loss,█▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂

0,1
epoch,19.0
train_acc,0.98325
train_loss,0.05164
val_acc,0.89242
val_accuracy,0.8964
val_loss,0.47672


[34m[1mwandb[0m: Agent Starting Run: j4uetd82 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 16
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.001


Initialized model: LSTM, Encoder layers: 3, Decoder layers: 2, Embedding: 16, Hidden: 256
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 2.3533 Train Accuracy: 0.3385
Validation loss: 1.7459 Validation Accuracy: 0.4944 
Epoch 2/20
Train loss: 1.0923 Train Accuracy: 0.6785
Validation loss: 0.7649 Validation Accuracy: 0.7649 
Epoch 3/20
Train loss: 0.5185 Train Accuracy: 0.8453
Validation loss: 0.4885 Validation Accuracy: 0.8493 
Epoch 4/20
Train loss: 0.3366 Train Accuracy: 0.8986
Validation loss: 0.4330 Validation Accuracy: 0.8666 
Epoch 5/20
Train loss: 0.2498 Train Accuracy: 0.9249
Validation loss: 0.4049 Validati

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▅▆▇▇███████████████
train_loss,█▄▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁▆▇█████████████████
val_accuracy,▁
val_loss,█▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,19.0
train_acc,0.98585
train_loss,0.04408
val_acc,0.8928
val_accuracy,0.89406
val_loss,0.47997


[34m[1mwandb[0m: Agent Starting Run: 5pyilpzd with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_dim: 16
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.001


Initialized model: LSTM, Encoder layers: 3, Decoder layers: 3, Embedding: 16, Hidden: 64
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 2.8161 Train Accuracy: 0.2137
Validation loss: 2.3594 Validation Accuracy: 0.3232 
Epoch 2/20
Train loss: 2.1135 Train Accuracy: 0.3849
Validation loss: 1.7788 Validation Accuracy: 0.4742 
Epoch 3/20
Train loss: 1.6930 Train Accuracy: 0.4990
Validation loss: 1.3874 Validation Accuracy: 0.5894 
Epoch 4/20
Train loss: 1.3739 Train Accuracy: 0.5879
Validation loss: 1.0726 Validation Accuracy: 0.6750 
Epoch 5/20
Train loss: 1.1194 Train Accuracy: 0.6613
Validation loss: 0.8414 Validatio

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▃▄▅▆▆▇▇▇▇▇█████████
train_loss,█▆▅▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁
val_acc,▁▃▄▅▆▇▇▇▇███████████
val_accuracy,▁
val_loss,█▆▅▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,19.0
train_acc,0.88276
train_loss,0.37201
val_acc,0.88515
val_accuracy,0.88515
val_loss,0.37916


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: fnx9cwqz with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 2
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_dim: 32
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0001


Initialized model: LSTM, Encoder layers: 3, Decoder layers: 2, Embedding: 32, Hidden: 256
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 2.9880 Train Accuracy: 0.1933
Validation loss: 2.5921 Validation Accuracy: 0.2793 
Epoch 2/20
Train loss: 2.4202 Train Accuracy: 0.3258
Validation loss: 2.2704 Validation Accuracy: 0.3661 
Epoch 3/20
Train loss: 2.1055 Train Accuracy: 0.4078
Validation loss: 1.9233 Validation Accuracy: 0.4610 
Epoch 4/20
Train loss: 1.7751 Train Accuracy: 0.4942
Validation loss: 1.5746 Validation Accuracy: 0.5526 
Epoch 5/20
Train loss: 1.4519 Train Accuracy: 0.5787
Validation loss: 1.2790 Validati

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▂▃▄▅▅▆▆▇▇▇▇▇███████
train_loss,█▇▆▅▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁
val_acc,▁▂▃▄▅▆▆▇▇▇▇█████████
val_accuracy,▁
val_loss,█▇▆▅▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁

0,1
epoch,19.0
train_acc,0.90515
train_loss,0.31434
val_acc,0.86896
val_accuracy,0.86896
val_loss,0.42231


[34m[1mwandb[0m: Agent Starting Run: 6y1wgao2 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_dim: 16
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.001


Initialized model: LSTM, Encoder layers: 3, Decoder layers: 1, Embedding: 16, Hidden: 256
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 2.4077 Train Accuracy: 0.3272
Validation loss: 1.7522 Validation Accuracy: 0.4966 
Epoch 2/20
Train loss: 1.1564 Train Accuracy: 0.6606
Validation loss: 0.8715 Validation Accuracy: 0.7327 
Epoch 3/20
Train loss: 0.6313 Train Accuracy: 0.8132
Validation loss: 0.6451 Validation Accuracy: 0.8038 
Epoch 4/20
Train loss: 0.4334 Train Accuracy: 0.8720
Validation loss: 0.5397 Validation Accuracy: 0.8336 
Epoch 5/20
Train loss: 0.3272 Train Accuracy: 0.9040
Validation loss: 0.5008 Validati

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▅▆▇▇▇▇█████████████
train_loss,█▄▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁▆▇▇████████████████
val_accuracy,▁
val_loss,█▃▂▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂

0,1
epoch,19.0
train_acc,0.98709
train_loss,0.0449
val_acc,0.86107
val_accuracy,0.86195
val_loss,0.60735


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: vnpxjjhb with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_dim: 16
[34m[1mwandb[0m: 	enc_layers: 1
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.001


Initialized model: LSTM, Encoder layers: 1, Decoder layers: 1, Embedding: 16, Hidden: 64
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 2.6749 Train Accuracy: 0.2619
Validation loss: 2.3236 Validation Accuracy: 0.3587 
Epoch 2/20
Train loss: 2.0640 Train Accuracy: 0.4141
Validation loss: 1.8578 Validation Accuracy: 0.4654 
Epoch 3/20
Train loss: 1.6401 Train Accuracy: 0.5181
Validation loss: 1.5106 Validation Accuracy: 0.5502 
Epoch 4/20
Train loss: 1.3550 Train Accuracy: 0.5939
Validation loss: 1.2755 Validation Accuracy: 0.6153 
Epoch 5/20
Train loss: 1.1638 Train Accuracy: 0.6506
Validation loss: 1.1270 Validatio

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▃▄▅▆▆▆▇▇▇▇▇████████
train_loss,█▆▅▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁
val_acc,▁▃▄▅▆▆▆▇▇▇▇█████████
val_accuracy,▁
val_loss,█▆▅▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁

0,1
epoch,19.0
train_acc,0.85959
train_loss,0.46901
val_acc,0.82002
val_accuracy,0.82002
val_loss,0.59727


[34m[1mwandb[0m: Agent Starting Run: nxdeysib with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 2
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_dim: 64
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.001


Initialized model: LSTM, Encoder layers: 3, Decoder layers: 2, Embedding: 64, Hidden: 256
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 2.0207 Train Accuracy: 0.4326
Validation loss: 1.0470 Validation Accuracy: 0.6850 
Epoch 2/20
Train loss: 0.7066 Train Accuracy: 0.7879
Validation loss: 0.5315 Validation Accuracy: 0.8355 
Epoch 3/20
Train loss: 0.4014 Train Accuracy: 0.8777
Validation loss: 0.4291 Validation Accuracy: 0.8670 
Epoch 4/20
Train loss: 0.2897 Train Accuracy: 0.9109
Validation loss: 0.3953 Validation Accuracy: 0.8787 
Epoch 5/20
Train loss: 0.2273 Train Accuracy: 0.9300
Validation loss: 0.3668 Validati

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▆▇▇▇▇██████████████
train_loss,█▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁▆▇▇████████████████
val_accuracy,▁
val_loss,█▃▂▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂

0,1
epoch,19.0
train_acc,0.98202
train_loss,0.05521
val_acc,0.89276
val_accuracy,0.89832
val_loss,0.47094


[34m[1mwandb[0m: Agent Starting Run: 6j1w82s9 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_dim: 256
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.001


Initialized model: LSTM, Encoder layers: 3, Decoder layers: 3, Embedding: 256, Hidden: 256
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 2.0474 Train Accuracy: 0.4219
Validation loss: 0.9888 Validation Accuracy: 0.6996 
Epoch 2/20
Train loss: 0.6884 Train Accuracy: 0.7929
Validation loss: 0.4801 Validation Accuracy: 0.8526 
Epoch 3/20
Train loss: 0.4008 Train Accuracy: 0.8762
Validation loss: 0.3879 Validation Accuracy: 0.8779 
Epoch 4/20
Train loss: 0.2993 Train Accuracy: 0.9071
Validation loss: 0.3710 Validation Accuracy: 0.8869 
Epoch 5/20
Train loss: 0.2427 Train Accuracy: 0.9243
Validation loss: 0.3528 Validat

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▆▇▇▇▇██████████████
train_loss,█▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁▆▇▇████████████████
val_accuracy,▁
val_loss,█▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂

0,1
epoch,19.0
train_acc,0.97694
train_loss,0.06884
val_acc,0.90778
val_accuracy,0.90856
val_loss,0.39497


[34m[1mwandb[0m: Agent Starting Run: 3s4rhr19 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 32
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.001


Initialized model: LSTM, Encoder layers: 3, Decoder layers: 2, Embedding: 32, Hidden: 64
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 2.5916 Train Accuracy: 0.2832
Validation loss: 2.0595 Validation Accuracy: 0.4209 
Epoch 2/20
Train loss: 1.7485 Train Accuracy: 0.4967
Validation loss: 1.4072 Validation Accuracy: 0.5802 
Epoch 3/20
Train loss: 1.2633 Train Accuracy: 0.6288
Validation loss: 1.0470 Validation Accuracy: 0.6894 
Epoch 4/20
Train loss: 0.9685 Train Accuracy: 0.7141
Validation loss: 0.8114 Validation Accuracy: 0.7615 
Epoch 5/20
Train loss: 0.7851 Train Accuracy: 0.7665
Validation loss: 0.6786 Validatio

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▃▅▆▆▇▇▇▇▇██████████
train_loss,█▅▄▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁▃▅▆▇▇▇▇████████████
val_accuracy,▁
val_loss,█▅▄▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,19.0
train_acc,0.90793
train_loss,0.2957
val_acc,0.87799
val_accuracy,0.87799
val_loss,0.40471


[34m[1mwandb[0m: Agent Starting Run: dn2t25bi with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_dim: 64
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0001


Initialized model: LSTM, Encoder layers: 3, Decoder layers: 3, Embedding: 64, Hidden: 256
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 3.0160 Train Accuracy: 0.1803
Validation loss: 2.5834 Validation Accuracy: 0.2843 
Epoch 2/20
Train loss: 2.4262 Train Accuracy: 0.3201
Validation loss: 2.2510 Validation Accuracy: 0.3742 
Epoch 3/20
Train loss: 2.0674 Train Accuracy: 0.4154
Validation loss: 1.8227 Validation Accuracy: 0.4793 
Epoch 4/20
Train loss: 1.6771 Train Accuracy: 0.5195
Validation loss: 1.4566 Validation Accuracy: 0.5744 
Epoch 5/20
Train loss: 1.3482 Train Accuracy: 0.6055
Validation loss: 1.1463 Validati

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▂▃▄▅▆▆▇▇▇▇▇████████
train_loss,█▇▆▅▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁
val_acc,▁▂▃▄▅▆▇▇▇▇▇█████████
val_accuracy,▁
val_loss,█▇▆▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁

0,1
epoch,19.0
train_acc,0.92826
train_loss,0.24002
val_acc,0.88934
val_accuracy,0.88934
val_loss,0.36081


[34m[1mwandb[0m: Agent Starting Run: 38fid00f with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_dim: 16
[34m[1mwandb[0m: 	enc_layers: 2
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.001


Initialized model: LSTM, Encoder layers: 2, Decoder layers: 3, Embedding: 16, Hidden: 256
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 2.3802 Train Accuracy: 0.3217
Validation loss: 1.8098 Validation Accuracy: 0.4692 
Epoch 2/20
Train loss: 1.3453 Train Accuracy: 0.5973
Validation loss: 1.0260 Validation Accuracy: 0.6848 
Epoch 3/20
Train loss: 0.7591 Train Accuracy: 0.7688
Validation loss: 0.6416 Validation Accuracy: 0.8010 
Epoch 4/20
Train loss: 0.4989 Train Accuracy: 0.8460
Validation loss: 0.5327 Validation Accuracy: 0.8388 
Epoch 5/20
Train loss: 0.3757 Train Accuracy: 0.8827
Validation loss: 0.4658 Validati

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▄▆▇▇▇▇█████████████
train_loss,█▅▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁▅▆▇▇███████████████
val_accuracy,▁
val_loss,█▄▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,19.0
train_acc,0.96801
train_loss,0.09799
val_acc,0.8951
val_accuracy,0.8951
val_loss,0.43417


[34m[1mwandb[0m: Agent Starting Run: 0qui5gg6 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 32
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.001


Initialized model: LSTM, Encoder layers: 3, Decoder layers: 3, Embedding: 32, Hidden: 256
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 2.1293 Train Accuracy: 0.3946
Validation loss: 1.1069 Validation Accuracy: 0.6644 
Epoch 2/20
Train loss: 0.7108 Train Accuracy: 0.7854
Validation loss: 0.5125 Validation Accuracy: 0.8408 
Epoch 3/20
Train loss: 0.3947 Train Accuracy: 0.8785
Validation loss: 0.4248 Validation Accuracy: 0.8686 
Epoch 4/20
Train loss: 0.2865 Train Accuracy: 0.9108
Validation loss: 0.3903 Validation Accuracy: 0.8811 
Epoch 5/20
Train loss: 0.2287 Train Accuracy: 0.9283
Validation loss: 0.3736 Validati

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▆▇▇▇███████████████
train_loss,█▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁▆▇▇████████████████
val_accuracy,▁
val_loss,█▂▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂

0,1
epoch,19.0
train_acc,0.97945
train_loss,0.06152
val_acc,0.89462
val_accuracy,0.8993
val_loss,0.47732


[34m[1mwandb[0m: Agent Starting Run: 64twpnxq with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_dim: 32
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0001


Initialized model: LSTM, Encoder layers: 3, Decoder layers: 3, Embedding: 32, Hidden: 256
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 3.1333 Train Accuracy: 0.1484
Validation loss: 2.8927 Validation Accuracy: 0.2125 
Epoch 2/20
Train loss: 2.5319 Train Accuracy: 0.3038
Validation loss: 2.2868 Validation Accuracy: 0.3681 
Epoch 3/20
Train loss: 2.1339 Train Accuracy: 0.4007
Validation loss: 1.9322 Validation Accuracy: 0.4556 
Epoch 4/20
Train loss: 1.8120 Train Accuracy: 0.4822
Validation loss: 1.6111 Validation Accuracy: 0.5343 
Epoch 5/20
Train loss: 1.5232 Train Accuracy: 0.5581
Validation loss: 1.3493 Validati

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▂▃▄▅▅▆▆▇▇▇▇▇███████
train_loss,█▇▆▅▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁
val_acc,▁▃▄▄▅▆▆▇▇▇▇█████████
val_accuracy,▁
val_loss,█▆▅▄▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁

0,1
epoch,19.0
train_acc,0.91199
train_loss,0.29077
val_acc,0.8787
val_accuracy,0.8787
val_loss,0.39879


[34m[1mwandb[0m: Agent Starting Run: wxq2zdvh with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 2
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_dim: 16
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.001


Initialized model: LSTM, Encoder layers: 3, Decoder layers: 2, Embedding: 16, Hidden: 64
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 2.7364 Train Accuracy: 0.2370
Validation loss: 2.3334 Validation Accuracy: 0.3417 
Epoch 2/20
Train loss: 2.0730 Train Accuracy: 0.4124
Validation loss: 1.7354 Validation Accuracy: 0.5023 
Epoch 3/20
Train loss: 1.5788 Train Accuracy: 0.5422
Validation loss: 1.2970 Validation Accuracy: 0.6143 
Epoch 4/20
Train loss: 1.2404 Train Accuracy: 0.6317
Validation loss: 1.0055 Validation Accuracy: 0.6926 
Epoch 5/20
Train loss: 1.0127 Train Accuracy: 0.6957
Validation loss: 0.8308 Validatio

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▃▄▅▆▆▇▇▇▇▇█████████
train_loss,█▆▅▄▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
val_acc,▁▃▅▆▆▇▇▇▇███████████
val_accuracy,▁
val_loss,█▆▄▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,19.0
train_acc,0.88472
train_loss,0.37274
val_acc,0.86883
val_accuracy,0.86883
val_loss,0.42893


[34m[1mwandb[0m: Agent Starting Run: ueqexn5g with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_dim: 32
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.001


Initialized model: LSTM, Encoder layers: 3, Decoder layers: 3, Embedding: 32, Hidden: 256
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 2.3612 Train Accuracy: 0.3352
Validation loss: 1.5460 Validation Accuracy: 0.5520 
Epoch 2/20
Train loss: 1.0129 Train Accuracy: 0.6995
Validation loss: 0.6353 Validation Accuracy: 0.8088 
Epoch 3/20
Train loss: 0.5227 Train Accuracy: 0.8410
Validation loss: 0.4575 Validation Accuracy: 0.8577 
Epoch 4/20
Train loss: 0.3654 Train Accuracy: 0.8873
Validation loss: 0.3892 Validation Accuracy: 0.8819 
Epoch 5/20
Train loss: 0.2882 Train Accuracy: 0.9107
Validation loss: 0.3707 Validati

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▅▇▇▇▇██████████████
train_loss,█▄▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁▆▇█████████████████
val_accuracy,▁
val_loss,█▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,19.0
train_acc,0.97825
train_loss,0.06625
val_acc,0.90369
val_accuracy,0.90719
val_loss,0.42093


[34m[1mwandb[0m: Agent Starting Run: eom2i57g with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 32
[34m[1mwandb[0m: 	enc_layers: 1
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.001


Initialized model: LSTM, Encoder layers: 1, Decoder layers: 3, Embedding: 32, Hidden: 256
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 2.4553 Train Accuracy: 0.3040
Validation loss: 1.7229 Validation Accuracy: 0.4974 
Epoch 2/20
Train loss: 1.2086 Train Accuracy: 0.6419
Validation loss: 0.8588 Validation Accuracy: 0.7427 
Epoch 3/20
Train loss: 0.6619 Train Accuracy: 0.7981
Validation loss: 0.5947 Validation Accuracy: 0.8177 
Epoch 4/20
Train loss: 0.4532 Train Accuracy: 0.8603
Validation loss: 0.4999 Validation Accuracy: 0.8465 
Epoch 5/20
Train loss: 0.3490 Train Accuracy: 0.8912
Validation loss: 0.4763 Validati

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▅▆▇▇▇██████████████
train_loss,█▄▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁▅▇▇▇███████████████
val_accuracy,▁
val_loss,█▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,19.0
train_acc,0.96793
train_loss,0.09764
val_acc,0.8865
val_accuracy,0.88662
val_loss,0.49659


[34m[1mwandb[0m: Agent Starting Run: 7fv6ig4j with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_dim: 256
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0001


Initialized model: LSTM, Encoder layers: 3, Decoder layers: 3, Embedding: 256, Hidden: 256
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 2.5996 Train Accuracy: 0.2925
Validation loss: 1.9088 Validation Accuracy: 0.4773 
Epoch 2/20
Train loss: 1.4949 Train Accuracy: 0.5802
Validation loss: 1.0358 Validation Accuracy: 0.6916 
Epoch 3/20
Train loss: 0.8957 Train Accuracy: 0.7385
Validation loss: 0.6832 Validation Accuracy: 0.7963 
Epoch 4/20
Train loss: 0.6376 Train Accuracy: 0.8100
Validation loss: 0.5471 Validation Accuracy: 0.8329 
Epoch 5/20
Train loss: 0.5025 Train Accuracy: 0.8494
Validation loss: 0.4735 Validat

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▄▆▆▇▇▇▇▇███████████
train_loss,█▅▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁▅▆▇▇▇██████████████
val_accuracy,▁
val_loss,█▄▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,19.0
train_acc,0.96704
train_loss,0.10911
val_acc,0.90331
val_accuracy,0.90535
val_loss,0.33928


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: x4vdqhtt with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 32
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.001


Initialized model: LSTM, Encoder layers: 3, Decoder layers: 1, Embedding: 32, Hidden: 256
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 1.7771 Train Accuracy: 0.4942
Validation loss: 0.9071 Validation Accuracy: 0.7279 
Epoch 2/20
Train loss: 0.6170 Train Accuracy: 0.8159
Validation loss: 0.5912 Validation Accuracy: 0.8161 
Epoch 3/20
Train loss: 0.3745 Train Accuracy: 0.8887
Validation loss: 0.5079 Validation Accuracy: 0.8460 
Epoch 4/20
Train loss: 0.2686 Train Accuracy: 0.9200
Validation loss: 0.4709 Validation Accuracy: 0.8601 
Epoch 5/20
Train loss: 0.2075 Train Accuracy: 0.9386
Validation loss: 0.4657 Validati

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▆▇▇▇▇██████████████
train_loss,█▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁▆▇█████████████████
val_accuracy,▁
val_loss,█▃▂▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄

0,1
epoch,19.0
train_acc,0.98739
train_loss,0.04207
val_acc,0.86065
val_accuracy,0.86352
val_loss,0.64988


[34m[1mwandb[0m: Agent Starting Run: 3bs0vsvs with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 256
[34m[1mwandb[0m: 	enc_layers: 1
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.001


Initialized model: LSTM, Encoder layers: 1, Decoder layers: 3, Embedding: 256, Hidden: 256
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 2.3574 Train Accuracy: 0.3315
Validation loss: 1.5538 Validation Accuracy: 0.5425 
Epoch 2/20
Train loss: 1.1467 Train Accuracy: 0.6566
Validation loss: 0.7788 Validation Accuracy: 0.7579 
Epoch 3/20
Train loss: 0.6870 Train Accuracy: 0.7880
Validation loss: 0.5899 Validation Accuracy: 0.8114 
Epoch 4/20
Train loss: 0.5041 Train Accuracy: 0.8425
Validation loss: 0.5015 Validation Accuracy: 0.8443 
Epoch 5/20
Train loss: 0.4046 Train Accuracy: 0.8726
Validation loss: 0.4642 Validat

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▅▆▇▇▇▇█████████████
train_loss,█▄▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁▅▇▇▇███████████████
val_accuracy,▁
val_loss,█▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,19.0
train_acc,0.95939
train_loss,0.12125
val_acc,0.87919
val_accuracy,0.88162
val_loss,0.4642


[34m[1mwandb[0m: Agent Starting Run: pis1jjcw with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 32
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.001


Initialized model: LSTM, Encoder layers: 3, Decoder layers: 3, Embedding: 32, Hidden: 256
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 1.9361 Train Accuracy: 0.4490
Validation loss: 0.8374 Validation Accuracy: 0.7478 
Epoch 2/20
Train loss: 0.5567 Train Accuracy: 0.8329
Validation loss: 0.4448 Validation Accuracy: 0.8603 
Epoch 3/20
Train loss: 0.3199 Train Accuracy: 0.9021
Validation loss: 0.3871 Validation Accuracy: 0.8828 
Epoch 4/20
Train loss: 0.2346 Train Accuracy: 0.9275
Validation loss: 0.3679 Validation Accuracy: 0.8888 
Epoch 5/20
Train loss: 0.1868 Train Accuracy: 0.9421
Validation loss: 0.3606 Validati

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▆▇▇▇███████████████
train_loss,█▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁▆▇▇████████████████
val_accuracy,▁
val_loss,█▂▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▃

0,1
epoch,19.0
train_acc,0.98342
train_loss,0.04972
val_acc,0.9013
val_accuracy,0.90387
val_loss,0.47713


[34m[1mwandb[0m: Agent Starting Run: nps4tzoe with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 256
[34m[1mwandb[0m: 	enc_layers: 2
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.001


Initialized model: LSTM, Encoder layers: 2, Decoder layers: 3, Embedding: 256, Hidden: 256
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 1.8253 Train Accuracy: 0.4765
Validation loss: 0.7738 Validation Accuracy: 0.7665 
Epoch 2/20
Train loss: 0.5854 Train Accuracy: 0.8194
Validation loss: 0.4634 Validation Accuracy: 0.8549 
Epoch 3/20
Train loss: 0.3766 Train Accuracy: 0.8819
Validation loss: 0.4027 Validation Accuracy: 0.8747 
Epoch 4/20
Train loss: 0.2904 Train Accuracy: 0.9084
Validation loss: 0.3652 Validation Accuracy: 0.8888 
Epoch 5/20
Train loss: 0.2401 Train Accuracy: 0.9242
Validation loss: 0.3572 Validat

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▆▇▇▇▇██████████████
train_loss,█▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁▅▆▇▇▇██████████████
val_accuracy,▁
val_loss,█▃▂▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂

0,1
epoch,19.0
train_acc,0.97266
train_loss,0.08207
val_acc,0.9045
val_accuracy,0.9045
val_loss,0.41434


[34m[1mwandb[0m: Agent Starting Run: qdsobqzx with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 32
[34m[1mwandb[0m: 	enc_layers: 2
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.001


Initialized model: LSTM, Encoder layers: 2, Decoder layers: 3, Embedding: 32, Hidden: 256
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 2.5076 Train Accuracy: 0.2912
Validation loss: 1.8079 Validation Accuracy: 0.4716 
Epoch 2/20
Train loss: 1.0975 Train Accuracy: 0.6747
Validation loss: 0.6284 Validation Accuracy: 0.8114 
Epoch 3/20
Train loss: 0.4894 Train Accuracy: 0.8515
Validation loss: 0.4382 Validation Accuracy: 0.8656 
Epoch 4/20
Train loss: 0.3364 Train Accuracy: 0.8969
Validation loss: 0.3727 Validation Accuracy: 0.8832 
Epoch 5/20
Train loss: 0.2645 Train Accuracy: 0.9183
Validation loss: 0.3497 Validati

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▅▇▇▇███████████████
train_loss,█▄▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁▆▇█████████████████
val_accuracy,▁
val_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,19.0
train_acc,0.97927
train_loss,0.06255
val_acc,0.90393
val_accuracy,0.90639
val_loss,0.42066


[34m[1mwandb[0m: Agent Starting Run: 0xdzy8z4 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 256
[34m[1mwandb[0m: 	enc_layers: 1
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.001


Initialized model: LSTM, Encoder layers: 1, Decoder layers: 1, Embedding: 256, Hidden: 256
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 1.4832 Train Accuracy: 0.5753
Validation loss: 0.7671 Validation Accuracy: 0.7623 
Epoch 2/20
Train loss: 0.5122 Train Accuracy: 0.8440
Validation loss: 0.5618 Validation Accuracy: 0.8219 
Epoch 3/20
Train loss: 0.3424 Train Accuracy: 0.8953
Validation loss: 0.5227 Validation Accuracy: 0.8366 
Epoch 4/20
Train loss: 0.2635 Train Accuracy: 0.9194
Validation loss: 0.5019 Validation Accuracy: 0.8432 
Epoch 5/20
Train loss: 0.2128 Train Accuracy: 0.9350
Validation loss: 0.5015 Validat

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▆▇▇▇▇▇█████████████
train_loss,█▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁▅▇▇▇███████████████
val_accuracy,▁
val_loss,█▃▂▁▁▁▁▁▂▂▂▃▃▃▄▄▄▄▅▅

0,1
epoch,19.0
train_acc,0.98174
train_loss,0.05742
val_acc,0.85205
val_accuracy,0.85541
val_loss,0.6629


[34m[1mwandb[0m: Agent Starting Run: y5olc3r0 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 64
[34m[1mwandb[0m: 	enc_layers: 2
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.001


Initialized model: LSTM, Encoder layers: 2, Decoder layers: 3, Embedding: 64, Hidden: 256
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 1.7663 Train Accuracy: 0.4944
Validation loss: 0.7465 Validation Accuracy: 0.7690 
Epoch 2/20
Train loss: 0.5564 Train Accuracy: 0.8304
Validation loss: 0.4576 Validation Accuracy: 0.8567 
Epoch 3/20
Train loss: 0.3567 Train Accuracy: 0.8899
Validation loss: 0.3782 Validation Accuracy: 0.8814 
Epoch 4/20
Train loss: 0.2748 Train Accuracy: 0.9148
Validation loss: 0.3537 Validation Accuracy: 0.8906 
Epoch 5/20
Train loss: 0.2265 Train Accuracy: 0.9289
Validation loss: 0.3560 Validati

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▆▇▇▇▇██████████████
train_loss,█▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁▆▇▇▇███████████████
val_accuracy,▁
val_loss,█▃▂▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▃

0,1
epoch,19.0
train_acc,0.97631
train_loss,0.07186
val_acc,0.90011
val_accuracy,0.90418
val_loss,0.44292


[34m[1mwandb[0m: Agent Starting Run: qmnbjtnr with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 2
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_dim: 256
[34m[1mwandb[0m: 	enc_layers: 1
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0001


Initialized model: LSTM, Encoder layers: 1, Decoder layers: 2, Embedding: 256, Hidden: 256
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 2.7467 Train Accuracy: 0.2648
Validation loss: 2.1745 Validation Accuracy: 0.4122 
Epoch 2/20
Train loss: 1.8375 Train Accuracy: 0.4895
Validation loss: 1.4644 Validation Accuracy: 0.5801 
Epoch 3/20
Train loss: 1.2862 Train Accuracy: 0.6282
Validation loss: 1.0592 Validation Accuracy: 0.6836 
Epoch 4/20
Train loss: 0.9793 Train Accuracy: 0.7100
Validation loss: 0.8464 Validation Accuracy: 0.7447 
Epoch 5/20
Train loss: 0.7978 Train Accuracy: 0.7608
Validation loss: 0.7161 Validat

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▃▅▆▆▇▇▇▇▇▇█████████
train_loss,█▅▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
val_acc,▁▄▅▆▇▇▇▇▇███████████
val_accuracy,▁
val_loss,█▅▄▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,19.0
train_acc,0.92531
train_loss,0.24577
val_acc,0.88156
val_accuracy,0.88156
val_loss,0.38428


[34m[1mwandb[0m: Agent Starting Run: nnrsw2nn with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_dim: 64
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.001


Initialized model: LSTM, Encoder layers: 3, Decoder layers: 3, Embedding: 64, Hidden: 256
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 2.2794 Train Accuracy: 0.3572
Validation loss: 1.4718 Validation Accuracy: 0.5713 
Epoch 2/20
Train loss: 0.9434 Train Accuracy: 0.7201
Validation loss: 0.5742 Validation Accuracy: 0.8230 
Epoch 3/20
Train loss: 0.4784 Train Accuracy: 0.8536
Validation loss: 0.4234 Validation Accuracy: 0.8695 
Epoch 4/20
Train loss: 0.3408 Train Accuracy: 0.8946
Validation loss: 0.3803 Validation Accuracy: 0.8853 
Epoch 5/20
Train loss: 0.2720 Train Accuracy: 0.9154
Validation loss: 0.3482 Validati

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▅▇▇▇▇██████████████
train_loss,█▄▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁▆▇▇████████████████
val_accuracy,▁
val_loss,█▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,19.0
train_acc,0.97705
train_loss,0.06942
val_acc,0.90863
val_accuracy,0.91132
val_loss,0.40698


[34m[1mwandb[0m: Agent Starting Run: ko50nofj with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_dim: 16
[34m[1mwandb[0m: 	enc_layers: 2
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.001


Initialized model: LSTM, Encoder layers: 2, Decoder layers: 3, Embedding: 16, Hidden: 256
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 2.3741 Train Accuracy: 0.3253
Validation loss: 1.6897 Validation Accuracy: 0.5013 
Epoch 2/20
Train loss: 1.2309 Train Accuracy: 0.6325
Validation loss: 0.8854 Validation Accuracy: 0.7268 
Epoch 3/20
Train loss: 0.6957 Train Accuracy: 0.7874
Validation loss: 0.5923 Validation Accuracy: 0.8167 
Epoch 4/20
Train loss: 0.4722 Train Accuracy: 0.8540
Validation loss: 0.4886 Validation Accuracy: 0.8501 
Epoch 5/20
Train loss: 0.3619 Train Accuracy: 0.8872
Validation loss: 0.4503 Validati

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▄▆▇▇▇▇█████████████
train_loss,█▄▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁▅▇▇▇███████████████
val_accuracy,▁
val_loss,█▄▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,19.0
train_acc,0.96691
train_loss,0.1003
val_acc,0.89284
val_accuracy,0.89401
val_loss,0.43175


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: mehgr1oo with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_dim: 32
[34m[1mwandb[0m: 	enc_layers: 2
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.001


Initialized model: LSTM, Encoder layers: 2, Decoder layers: 3, Embedding: 32, Hidden: 256
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 2.1367 Train Accuracy: 0.3913
Validation loss: 1.1442 Validation Accuracy: 0.6605 
Epoch 2/20
Train loss: 0.7637 Train Accuracy: 0.7701
Validation loss: 0.5182 Validation Accuracy: 0.8375 
Epoch 3/20
Train loss: 0.4338 Train Accuracy: 0.8663
Validation loss: 0.4136 Validation Accuracy: 0.8688 
Epoch 4/20
Train loss: 0.3230 Train Accuracy: 0.8991
Validation loss: 0.3839 Validation Accuracy: 0.8818 
Epoch 5/20
Train loss: 0.2633 Train Accuracy: 0.9170
Validation loss: 0.3571 Validati

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▆▇▇▇▇██████████████
train_loss,█▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁▆▇▇████████████████
val_accuracy,▁
val_loss,█▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂

0,1
epoch,19.0
train_acc,0.97199
train_loss,0.08537
val_acc,0.90154
val_accuracy,0.90417
val_loss,0.42528


[34m[1mwandb[0m: Agent Starting Run: ior1t3ij with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 32
[34m[1mwandb[0m: 	enc_layers: 2
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.001


Initialized model: LSTM, Encoder layers: 2, Decoder layers: 3, Embedding: 32, Hidden: 256
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 2.4508 Train Accuracy: 0.3057
Validation loss: 1.6071 Validation Accuracy: 0.5292 
Epoch 2/20
Train loss: 1.0270 Train Accuracy: 0.6934
Validation loss: 0.6594 Validation Accuracy: 0.8033 
Epoch 3/20
Train loss: 0.5262 Train Accuracy: 0.8400
Validation loss: 0.4742 Validation Accuracy: 0.8546 
Epoch 4/20
Train loss: 0.3640 Train Accuracy: 0.8882
Validation loss: 0.4120 Validation Accuracy: 0.8762 
Epoch 5/20
Train loss: 0.2801 Train Accuracy: 0.9139
Validation loss: 0.3848 Validati

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▅▇▇▇▇██████████████
train_loss,█▄▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁▆▇█████████████████
val_accuracy,▁
val_loss,█▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,19.0
train_acc,0.97999
train_loss,0.06161
val_acc,0.89814
val_accuracy,0.90162
val_loss,0.44566


[34m[1mwandb[0m: Agent Starting Run: ch0opkzr with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 16
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.001


Initialized model: LSTM, Encoder layers: 3, Decoder layers: 3, Embedding: 16, Hidden: 256
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 2.6203 Train Accuracy: 0.2617
Validation loss: 2.1770 Validation Accuracy: 0.3769 
Epoch 2/20
Train loss: 1.5374 Train Accuracy: 0.5496
Validation loss: 0.9552 Validation Accuracy: 0.7099 
Epoch 3/20
Train loss: 0.6937 Train Accuracy: 0.7915
Validation loss: 0.5416 Validation Accuracy: 0.8356 
Epoch 4/20
Train loss: 0.4225 Train Accuracy: 0.8720
Validation loss: 0.4311 Validation Accuracy: 0.8656 
Epoch 5/20
Train loss: 0.3130 Train Accuracy: 0.9047
Validation loss: 0.3874 Validati

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▄▆▇▇▇██████████████
train_loss,█▅▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁▅▇▇████████████████
val_accuracy,▁
val_loss,█▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,19.0
train_acc,0.98125
train_loss,0.05743
val_acc,0.90325
val_accuracy,0.90335
val_loss,0.42584


[34m[1mwandb[0m: Agent Starting Run: 75zbaqsy with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 2
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_dim: 16
[34m[1mwandb[0m: 	enc_layers: 1
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0001


Initialized model: LSTM, Encoder layers: 1, Decoder layers: 2, Embedding: 16, Hidden: 256
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 3.1494 Train Accuracy: 0.1437
Validation loss: 2.8291 Validation Accuracy: 0.2158 
Epoch 2/20
Train loss: 2.6445 Train Accuracy: 0.2617
Validation loss: 2.4570 Validation Accuracy: 0.3249 
Epoch 3/20
Train loss: 2.2920 Train Accuracy: 0.3619
Validation loss: 2.1099 Validation Accuracy: 0.4128 
Epoch 4/20
Train loss: 1.9863 Train Accuracy: 0.4374
Validation loss: 1.8339 Validation Accuracy: 0.4786 
Epoch 5/20
Train loss: 1.7443 Train Accuracy: 0.4951
Validation loss: 1.6107 Validati

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▂▃▄▅▅▆▆▆▇▇▇▇▇▇█████
train_loss,█▇▆▅▄▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁
val_acc,▁▂▃▄▅▅▆▆▆▇▇▇▇▇██████
val_accuracy,▁
val_loss,█▇▆▅▄▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁

0,1
epoch,19.0
train_acc,0.83755
train_loss,0.53185
val_acc,0.81485
val_accuracy,0.81485
val_loss,0.59964


[34m[1mwandb[0m: Agent Starting Run: 3my2r8sz with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 256
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.001


Initialized model: LSTM, Encoder layers: 3, Decoder layers: 3, Embedding: 256, Hidden: 256
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 1.6390 Train Accuracy: 0.5322
Validation loss: 0.6752 Validation Accuracy: 0.7895 
Epoch 2/20
Train loss: 0.5088 Train Accuracy: 0.8440
Validation loss: 0.4366 Validation Accuracy: 0.8604 
Epoch 3/20
Train loss: 0.3302 Train Accuracy: 0.8973
Validation loss: 0.3751 Validation Accuracy: 0.8852 
Epoch 4/20
Train loss: 0.2554 Train Accuracy: 0.9203
Validation loss: 0.3735 Validation Accuracy: 0.8882 
Epoch 5/20
Train loss: 0.2099 Train Accuracy: 0.9339
Validation loss: 0.3538 Validat

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▆▇▇▇▇██████████████
train_loss,█▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁▅▇▇▇███████████████
val_accuracy,▁
val_loss,█▃▂▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃

0,1
epoch,19.0
train_acc,0.97667
train_loss,0.07026
val_acc,0.90005
val_accuracy,0.9036
val_loss,0.42896


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ac4a1856 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 16
[34m[1mwandb[0m: 	enc_layers: 2
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.001


Initialized model: LSTM, Encoder layers: 2, Decoder layers: 2, Embedding: 16, Hidden: 256
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 2.3425 Train Accuracy: 0.3398
Validation loss: 2.0647 Validation Accuracy: 0.4052 
Epoch 2/20
Train loss: 1.4958 Train Accuracy: 0.5645
Validation loss: 1.3800 Validation Accuracy: 0.5940 
Epoch 3/20
Train loss: 0.9103 Train Accuracy: 0.7262
Validation loss: 0.9797 Validation Accuracy: 0.7025 
Epoch 4/20
Train loss: 0.6082 Train Accuracy: 0.8134
Validation loss: 0.8355 Validation Accuracy: 0.7489 
Epoch 5/20
Train loss: 0.4460 Train Accuracy: 0.8626
Validation loss: 0.7559 Validati

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▃▅▆▇▇▇▇████████████
train_loss,█▅▄▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁▄▆▆▇▇▇▇████████████
val_accuracy,▁
val_loss,█▅▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,19.0
train_acc,0.97406
train_loss,0.07868
val_acc,0.84815
val_accuracy,0.84873
val_loss,0.68922


[34m[1mwandb[0m: Agent Starting Run: 0y9iuutb with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 64
[34m[1mwandb[0m: 	enc_layers: 1
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.001


Initialized model: LSTM, Encoder layers: 1, Decoder layers: 2, Embedding: 64, Hidden: 256
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 1.5500 Train Accuracy: 0.5554
Validation loss: 0.6730 Validation Accuracy: 0.7925 
Epoch 2/20
Train loss: 0.4906 Train Accuracy: 0.8492
Validation loss: 0.4653 Validation Accuracy: 0.8539 
Epoch 3/20
Train loss: 0.3237 Train Accuracy: 0.9003
Validation loss: 0.4235 Validation Accuracy: 0.8707 
Epoch 4/20
Train loss: 0.2453 Train Accuracy: 0.9238
Validation loss: 0.4052 Validation Accuracy: 0.8766 
Epoch 5/20
Train loss: 0.1984 Train Accuracy: 0.9379
Validation loss: 0.3993 Validati

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▆▇▇▇▇██████████████
train_loss,█▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁▅▇▇▇█▇█████████████
val_accuracy,▁
val_loss,█▃▂▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄

0,1
epoch,19.0
train_acc,0.97681
train_loss,0.06957
val_acc,0.88706
val_accuracy,0.8911
val_loss,0.50044


[34m[1mwandb[0m: Agent Starting Run: lufvzj6m with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 32
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	hidden_dim: 32
[34m[1mwandb[0m: 	learning_rate: 0.001


Initialized model: LSTM, Encoder layers: 3, Decoder layers: 3, Embedding: 32, Hidden: 32
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 2.7659 Train Accuracy: 0.2312
Validation loss: 2.4358 Validation Accuracy: 0.3225 
Epoch 2/20
Train loss: 2.2679 Train Accuracy: 0.3596
Validation loss: 2.0365 Validation Accuracy: 0.4214 
Epoch 3/20
Train loss: 1.9095 Train Accuracy: 0.4509
Validation loss: 1.6555 Validation Accuracy: 0.5173 
Epoch 4/20
Train loss: 1.6232 Train Accuracy: 0.5263
Validation loss: 1.3761 Validation Accuracy: 0.5907 
Epoch 5/20
Train loss: 1.4139 Train Accuracy: 0.5803
Validation loss: 1.1795 Validatio

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▃▄▅▅▆▆▇▇▇▇▇▇███████
train_loss,█▆▅▄▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁
val_acc,▁▂▄▅▅▆▆▇▇▇▇█████████
val_accuracy,▁
val_loss,█▇▅▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁

0,1
epoch,19.0
train_acc,0.80073
train_loss,0.64221
val_acc,0.83003
val_accuracy,0.83003
val_loss,0.54481


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 26noqztz with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_dim: 32
[34m[1mwandb[0m: 	enc_layers: 1
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.001


Initialized model: LSTM, Encoder layers: 1, Decoder layers: 3, Embedding: 32, Hidden: 256
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 2.5308 Train Accuracy: 0.2856
Validation loss: 2.1868 Validation Accuracy: 0.3696 
Epoch 2/20
Train loss: 1.8201 Train Accuracy: 0.4678
Validation loss: 1.7413 Validation Accuracy: 0.4837 
Epoch 3/20
Train loss: 1.3764 Train Accuracy: 0.5917
Validation loss: 1.3666 Validation Accuracy: 0.5951 
Epoch 4/20
Train loss: 1.0267 Train Accuracy: 0.6908
Validation loss: 1.0722 Validation Accuracy: 0.6781 
Epoch 5/20
Train loss: 0.7702 Train Accuracy: 0.7656
Validation loss: 0.8574 Validati

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▃▄▅▆▇▇▇▇███████████
train_loss,█▆▅▄▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁▃▄▅▆▇▇▇████████████
val_accuracy,▁
val_loss,█▆▅▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,19.0
train_acc,0.94969
train_loss,0.15434
val_acc,0.8654
val_accuracy,0.8667
val_loss,0.52825


[34m[1mwandb[0m: Agent Starting Run: 9l7ktg0r with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_dim: 64
[34m[1mwandb[0m: 	enc_layers: 1
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.001


Initialized model: LSTM, Encoder layers: 1, Decoder layers: 3, Embedding: 64, Hidden: 256
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 2.1446 Train Accuracy: 0.3871
Validation loss: 1.1250 Validation Accuracy: 0.6581 
Epoch 2/20
Train loss: 0.8464 Train Accuracy: 0.7395
Validation loss: 0.5914 Validation Accuracy: 0.8149 
Epoch 3/20
Train loss: 0.5105 Train Accuracy: 0.8408
Validation loss: 0.4707 Validation Accuracy: 0.8530 
Epoch 4/20
Train loss: 0.3845 Train Accuracy: 0.8799
Validation loss: 0.4203 Validation Accuracy: 0.8674 
Epoch 5/20
Train loss: 0.3160 Train Accuracy: 0.9001
Validation loss: 0.4076 Validati

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▅▇▇▇▇██████████████
train_loss,█▄▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁▆▇▇▇███████████████
val_accuracy,▁
val_loss,█▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,19.0
train_acc,0.96252
train_loss,0.11365
val_acc,0.89355
val_accuracy,0.89543
val_loss,0.42535


[34m[1mwandb[0m: Agent Starting Run: xuq5qctd with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 16
[34m[1mwandb[0m: 	enc_layers: 2
[34m[1mwandb[0m: 	hidden_dim: 32
[34m[1mwandb[0m: 	learning_rate: 0.001


Initialized model: LSTM, Encoder layers: 2, Decoder layers: 3, Embedding: 16, Hidden: 32
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 3.0719 Train Accuracy: 0.1528
Validation loss: 2.6205 Validation Accuracy: 0.2475 
Epoch 2/20
Train loss: 2.4953 Train Accuracy: 0.2808
Validation loss: 2.3626 Validation Accuracy: 0.3063 
Epoch 3/20
Train loss: 2.3007 Train Accuracy: 0.3296
Validation loss: 2.1473 Validation Accuracy: 0.3752 
Epoch 4/20
Train loss: 2.1279 Train Accuracy: 0.3792
Validation loss: 1.9574 Validation Accuracy: 0.4263 
Epoch 5/20
Train loss: 1.9865 Train Accuracy: 0.4170
Validation loss: 1.8152 Validatio

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▃▃▄▄▅▅▆▆▆▆▇▇▇▇▇████
train_loss,█▆▅▅▄▄▄▃▃▃▂▂▂▂▂▁▁▁▁▁
val_acc,▁▂▃▃▄▄▅▅▆▆▆▇▇▇▇█████
val_accuracy,▁
val_loss,█▇▆▅▅▄▄▄▃▃▃▂▂▂▂▁▁▁▁▁

0,1
epoch,19.0
train_acc,0.69463
train_loss,0.98746
val_acc,0.75853
val_accuracy,0.75853
val_loss,0.78084


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: byf4b15b with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_dim: 256
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0001


Initialized model: LSTM, Encoder layers: 3, Decoder layers: 1, Embedding: 256, Hidden: 256
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 2.6490 Train Accuracy: 0.2957
Validation loss: 2.1270 Validation Accuracy: 0.4282 
Epoch 2/20
Train loss: 1.5949 Train Accuracy: 0.5586
Validation loss: 1.2313 Validation Accuracy: 0.6436 
Epoch 3/20
Train loss: 1.0220 Train Accuracy: 0.7089
Validation loss: 0.9031 Validation Accuracy: 0.7358 
Epoch 4/20
Train loss: 0.7737 Train Accuracy: 0.7781
Validation loss: 0.7473 Validation Accuracy: 0.7741 
Epoch 5/20
Train loss: 0.6370 Train Accuracy: 0.8165
Validation loss: 0.6565 Validat

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▄▅▆▇▇▇▇▇▇██████████
train_loss,█▅▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁▄▆▆▇▇▇▇████████████
val_accuracy,▁
val_loss,█▄▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,19.0
train_acc,0.94704
train_loss,0.19213
val_acc,0.87407
val_accuracy,0.87407
val_loss,0.413


[34m[1mwandb[0m: Agent Starting Run: q3fvplax with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_dim: 64
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.001


Initialized model: LSTM, Encoder layers: 3, Decoder layers: 3, Embedding: 64, Hidden: 256
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 2.3052 Train Accuracy: 0.3511
Validation loss: 1.3998 Validation Accuracy: 0.5900 
Epoch 2/20
Train loss: 0.9245 Train Accuracy: 0.7232
Validation loss: 0.6254 Validation Accuracy: 0.8115 
Epoch 3/20
Train loss: 0.5081 Train Accuracy: 0.8449
Validation loss: 0.4403 Validation Accuracy: 0.8627 
Epoch 4/20
Train loss: 0.3665 Train Accuracy: 0.8867
Validation loss: 0.3928 Validation Accuracy: 0.8768 
Epoch 5/20
Train loss: 0.2906 Train Accuracy: 0.9098
Validation loss: 0.3568 Validati

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▅▇▇▇▇██████████████
train_loss,█▄▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁▆▇▇████████████████
val_accuracy,▁
val_loss,█▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,19.0
train_acc,0.97638
train_loss,0.07191
val_acc,0.90431
val_accuracy,0.90572
val_loss,0.40205


[34m[1mwandb[0m: Agent Starting Run: lcfmfw2y with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 32
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.001


Initialized model: LSTM, Encoder layers: 3, Decoder layers: 3, Embedding: 32, Hidden: 256
Model created on CPU, trying to move to device...
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 2.0853 Train Accuracy: 0.4081
Validation loss: 1.2976 Validation Accuracy: 0.6174 
Epoch 2/20
Train loss: 0.8417 Train Accuracy: 0.7483
Validation loss: 0.5895 Validation Accuracy: 0.8200 
Epoch 3/20
Train loss: 0.4326 Train Accuracy: 0.8685
Validation loss: 0.4411 Validation Accuracy: 0.8615 
Epoch 4/20
Train loss: 0.2991 Train Accuracy: 0.9079
Validation loss: 0.4061 Validation Accuracy: 0.8772 
Epoch 5/20
Train loss: 0.2316 Train Accuracy: 0.9281
Validation loss: 0.3865 Validati

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▅▇▇▇███████████████
train_loss,█▄▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁▆▇▇████████████████
val_accuracy,▁
val_loss,█▃▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂

0,1
epoch,19.0
train_acc,0.97983
train_loss,0.06036
val_acc,0.89701
val_accuracy,0.89737
val_loss,0.47758


## Testing

In [11]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
from torch.utils.data import DataLoader
import os
from tqdm import tqdm
import csv
from IPython.display import display, HTML

In [12]:
path_test = "/kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.test.tsv"

In [13]:
test_dataset = TransliterationDataset(path_test, src_vocab, tgt_vocab)
test_loader = DataLoader(test_dataset, batch_size=1)

Loaded 5747 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.test.tsv
Example pairs: [('amkamlo', 'అంకంలో'), ('ankamlo', 'అంకంలో')]


In [14]:
def decode_sequence(seq, char_vocab, end_token=3):
    """
    Convert index sequence to character sequence
    
    Args:
        seq: Sequence of indices
        char_vocab: Dictionary mapping characters to indices
        end_token: Token index representing end of sequence (default=2)
        
    Returns:
        String of characters
    """
    # Create inverse mapping (idx -> char)
    idx_to_char = {idx: ch for ch, idx in char_vocab.items()}
    
    # Process sequence and stop at end token
    result = []
    has_end_token = False
    
    for idx in seq:
        if idx == 0:  # Skip padding token
            continue
        if idx == end_token:  # Stop at end token
            has_end_token = True
            break
        if idx in idx_to_char:
            result.append(idx_to_char[idx])
        else:
            # Handle out-of-vocabulary tokens
            result.append('<UNK>')
    
    # If no end token was found, use the entire sequence
    # (this helps prevent truncated outputs)
    decoded = ''.join(result)
    
    return decoded

In [15]:
def evaluate_model(model, test_loader, src_vocab, tgt_vocab, device, end_token=3, unk_token=1, output_file="predictions_vanilla.csv"):
    """
    Evaluate model on test set and save results to CSV
    
    Args:
        model: The trained model
        test_loader: DataLoader for test dataset
        src_vocab: Source vocabulary (char -> idx mapping)
        tgt_vocab: Target vocabulary (char -> idx mapping)
        device: Device to run evaluation on
        end_token: Token index representing end of sequence (default=2)
        unk_token: Token index representing unknown token (default=1)
        output_file: Path to save CSV results
        
    Returns:
        List of results with input, prediction, target, and correctness
    """
    model.eval()
    
    results = []
    
    # Get target vocabulary size
    tgt_vocab_size = len(tgt_vocab)
    
    # Create inverse mapping for vocabulary inspection
    idx_to_char = {idx: ch for ch, idx in tgt_vocab.items()}
    print(f"Target vocabulary size: {len(tgt_vocab)}")
    print(f"Special tokens: PAD={0}, UNK={unk_token}, SOS={2}, EOS={end_token}")
    
    with torch.no_grad(), open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['input', 'prediction', 'target', 'correct'])
        
        for batch in tqdm(test_loader, desc="Evaluating"):
            src, tgt = batch
            src, tgt = src.to(device), tgt.to(device)
            
            # Forward pass without teacher forcing
            if hasattr(model, 'inference'):
                output = model.inference(src)
            else:
                # Use our custom inference function
                output = inference_without_teacher_forcing(
                    model, src, max_len=100, 
                    device=device
                )
            
            # Process each sequence in the batch
            for i in range(src.size(0)):
                src_seq = src[i].cpu().numpy()
                
                # If tgt starts with UNK token (used instead of SOS), skip it
                if tgt[i, 0].item() == unk_token:
                    tgt_seq = tgt[i, 1:].cpu().numpy()  # Skip UNK token
                else:
                    tgt_seq = tgt[i].cpu().numpy()  # Use entire sequence
                
                pred_seq = output[i].cpu().numpy()
                
                # Convert sequences to strings (exclude the sos in the beginning)
                src_str = decode_sequence(src_seq, src_vocab, end_token)
                tgt_str = decode_sequence(tgt_seq, tgt_vocab, end_token)
                pred_str = decode_sequence(pred_seq, tgt_vocab, end_token)
                
                # Check correctness
                correct = pred_str == tgt_str[5:]
                
                # Save to CSV
                writer.writerow([
                    src_str[5:], 
                    pred_str, 
                    tgt_str[5:]
                ])
                
                # Save for visualization
                results.append({
                    'input': src_str[5:],
                    'prediction': pred_str,
                    'target': tgt_str[5:],
                    'correct': correct
                })
    
    # Calculate overall accuracy
    accuracy = sum(1 for r in results if r['correct']) / len(results) if results else 0
    print(f"Overall Word Accuracy: {accuracy:.4f}")
    
    return accuracy, results

In [16]:
def generate_char_comparison_html(input_str, pred_str, target_str):
    """
    Generate HTML with character-by-character comparison
    """
    html = f"<p><b>Input:</b> {input_str}</p>"
    
    html += "<p><b>Prediction vs Target:</b> "
    
    # Extend the shorter string with spaces to match the longer one
    max_len = max(len(pred_str), len(target_str))
    pred_str_padded = pred_str.ljust(max_len)
    target_str_padded = target_str.ljust(max_len)
    
    for p_char, t_char in zip(pred_str_padded, target_str_padded):
        if p_char == t_char:
            html += f'<span style="color:green">{p_char}</span>'
        else:
            html += f'<span style="color:red">{p_char}</span>'
    
    html += "</p>"
    
    html += f"<p><b>Target:</b> {target_str}</p>"
    
    return html

In [17]:
def visualize_errors(results, n_samples=10):
    """
    Visualize character-level errors for random samples
    """
    # Sample random results (prioritize some incorrect ones)
    incorrect_samples = [r for r in results if not r['correct']]
    correct_samples = [r for r in results if r['correct']]
    
    if len(incorrect_samples) > 0 and len(correct_samples) > 0:
        # Try to get a mix of correct and incorrect samples
        n_incorrect = min(n_samples // 2, len(incorrect_samples))
        n_correct = n_samples - n_incorrect
        
        samples = (random.sample(incorrect_samples, n_incorrect) + 
                  random.sample(correct_samples, min(n_correct, len(correct_samples))))
    else:
        # If all samples are correct or incorrect, just sample randomly
        samples = random.sample(results, min(n_samples, len(results)))
    
    html_output = "<h2>Character-level Error Visualization</h2>"
    html_output += "<p>Green: Correct characters, Red: Incorrect characters</p>"
    
    for i, sample in enumerate(samples):
        html_output += f"<h3>Sample {i+1}</h3>"
        html_output += generate_char_comparison_html(
            sample['input'], 
            sample['prediction'], 
            sample['target']
        )
        html_output += "<hr>"
    
    return HTML(html_output)

In [18]:
api = wandb.Api()
sweep = api.sweep("da24m027-indian-institute-of-technology-madras/DA6401_Assignment3/zodbp4ie")
best_run = sweep.best_run()
best_run_config = best_run.config

[34m[1mwandb[0m: Sorting runs by -summary_metrics.val_accuracy


In [19]:
best_run_config

{'dropout': 0.3,
 'beam_size': 3,
 'cell_type': 'LSTM',
 'embed_dim': 64,
 'batch_size': 64,
 'dec_layers': 3,
 'enc_layers': 3,
 'hidden_dim': 256,
 'learning_rate': 0.001}

In [20]:
def test_and_evaluate(config=None):
    run = wandb.init(config=config, project="DA6401_Assignment3")
    cfg = run.config
    epochs = 20
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        
    run.name = "Testing without Attention"

    # First create on CPU
    model = Seq2Seq(cfg, len(src_vocab), len(tgt_vocab))
    model = model.to(device)
    print("Model successfully moved to device.")
        
    print("Loading datasets...")
    train_dataset = TransliterationDataset(path_train, src_vocab, tgt_vocab)
    dev_dataset = TransliterationDataset(path_dev, src_vocab, tgt_vocab)
            
    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=cfg.batch_size, shuffle=True)
    dev_loader = DataLoader(dev_dataset, batch_size=cfg.batch_size)
        
    criterion = nn.CrossEntropyLoss(ignore_index=0)  # 0 is padding index
    optimizer = optim.Adam(model.parameters(), lr=cfg.learning_rate)
    best_val_acc = 0.0
    for epoch in range(epochs):
        print(f"Epoch {epoch+1}/{epochs}")
                
        # Training
        train_loss, train_acc = run_epoch(model, train_loader, criterion, optimizer, device)
        print(f"Train loss: {train_loss:.4f} Train Accuracy: {train_acc:.4f}")
                
        # Validation
        val_loss, val_acc = evaluate(model, dev_loader, criterion, device)
        print(f"Validation loss: {val_loss:.4f} Val Accuracy: {val_acc:.4f}")
                
        # Log metrics
        wandb.log({'train_loss': train_loss, 'val_loss': val_loss, 'train_acc': train_acc, 'val_acc': val_acc, 'epoch': epoch})
        if best_val_acc < val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), 'model.pth')
    test_acc, results = evaluate_model(model, test_loader, src_vocab, tgt_vocab, device)
    wandb.log({'Test_acc':test_acc})
    display(visualize_errors(results, n_samples=10))


In [21]:
test_and_evaluate(best_run_config)

Initialized model without attention: LSTM, Encoder layers: 3, Decoder layers: 3, Embedding: 64, Hidden: 256
Model successfully moved to device.
Loading datasets...
Loaded 58550 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.train.tsv
Example pairs: [('amkita', 'అంకిత'), ('ankita', 'అంకిత')]
Loaded 5683 records from /kaggle/input/dakshina-telugu/lexicons/te.translit.sampled.dev.tsv
Example pairs: [('amka', 'అంక'), ('anka', 'అంక')]
Epoch 1/20
Train loss: 2.3382 Train Accuracy: 0.3496
Validation loss: 1.4840 Val Accuracy: 0.5830
Epoch 2/20
Train loss: 0.8767 Train Accuracy: 0.7456
Validation loss: 0.4605 Val Accuracy: 0.8642
Epoch 3/20
Train loss: 0.3857 Train Accuracy: 0.8842
Validation loss: 0.3320 Val Accuracy: 0.8971
Epoch 4/20
Train loss: 0.2666 Train Accuracy: 0.9191
Validation loss: 0.3029 Val Accuracy: 0.9084
Epoch 5/20
Train loss: 0.2091 Train Accuracy: 0.9364
Validation loss: 0.2894 Val Accuracy: 0.9131
Epoch 6/20
Train loss: 0.1731 Train Accuracy: 0.947

Evaluating: 100%|██████████| 5747/5747 [00:35<00:00, 164.11it/s]

Overall Word Accuracy: 0.5453



