In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/telugu-lexicon/te.translit.sampled.train.tsv
/kaggle/input/telugu-lexicon/te.translit.sampled.dev.tsv
/kaggle/input/telugu-lexicon/te.translit.sampled.test.tsv


In [2]:
import pandas as pd

# Load data
train_df = pd.read_csv("/kaggle/input/telugu-lexicon/te.translit.sampled.train.tsv", sep="\t", header=None)
train_df.columns = ["target", "source", "label"]
#pairs = list(zip(train_df["source"], train_df["target"]))  # Latin to Devanagari

# Preview

val_df = pd.read_csv("/kaggle/input/telugu-lexicon/te.translit.sampled.dev.tsv", sep="\t", header=None)
val_df.columns = ["target", "source", "label"]
#val_pairs = list(zip(val_df["source"], val_df["target"]))

# Drop any rows where source or target is missing
train_df = train_df.dropna(subset=["source", "target"])
val_df = val_df.dropna(subset=["source", "target"])

# Ensure source and target are strings
train_df["source"] = train_df["source"].astype(str)
train_df["target"] = train_df["target"].astype(str)
val_df["source"] = val_df["source"].astype(str)
val_df["target"] = val_df["target"].astype(str)

# Create pairs
pairs = list(zip(train_df["source"], train_df["target"]))
val_pairs = list(zip(val_df["source"], val_df["target"]))


In [3]:
print(len(val_pairs))

5683


In [4]:
import random

sample_size = 10000
pairs = random.sample(pairs, sample_size)    # use only 2000 training samples
#val_pairs = random.sample(val_pairs, sample_size) 

In [5]:
import torch
import torch.nn as nn
import torch.optim as optim


In [6]:
#pairs = list(zip(df["source"], df["target"]))

#pairs = [
 #   ("namaste", "नमस्ते"),
  #  ("bharat",  "भारत"),
   # ("duniya",  "दुनिया"),
    #("prem",    "प्रेम"),
#]


In [7]:
SRC_CHARS = set("".join(s for s, _ in pairs))
TRG_CHARS = set("".join(t for _, t in pairs)) | {"<sos>", "<eos>"}

src2idx = {ch: i+1 for i, ch in enumerate(sorted(SRC_CHARS))}  # reserve 0 for padding
src2idx["<pad>"] = 0
trg2idx = {ch: i+1 for i, ch in enumerate(sorted(TRG_CHARS))}
trg2idx["<pad>"] = 0

idx2trg = {i: ch for ch, i in trg2idx.items()}


In [8]:
import torch
import torch.nn as nn

# === RNN Cell Wrapper === #
def get_rnn_cell(cell_type):
    cell_type = cell_type.upper()
    if cell_type == "GRU":
        return nn.GRU
    elif cell_type == "LSTM":
        return nn.LSTM
    elif cell_type == "RNN":
        return nn.RNN
    else:
        raise ValueError("Unsupported RNN cell type. Use 'RNN', 'GRU', or 'LSTM'.")

# === Encoder === #
class Encoder(nn.Module):
    def __init__(self, input_dim, emb_dim, hidden_dim, cell_type="GRU", num_layers=1, dropout=0.0):
        super().__init__()
        self.embedding = nn.Embedding(input_dim, emb_dim, padding_idx=0)
        self.rnn = get_rnn_cell(cell_type)(
            emb_dim, hidden_dim, num_layers, dropout=dropout if num_layers > 1 else 0.0
        )
    def forward(self, src):
        embedded = self.embedding(src)  # [src_len, batch=1, emb_dim]
        outputs, hidden = self.rnn(embedded)  # hidden: [num_layers, batch, hidden_dim]
        return hidden

# === Decoder === #
class Decoder(nn.Module):
    def __init__(self, output_dim, emb_dim, hidden_dim, cell_type="GRU", num_layers=1, dropout=0.0):
        super().__init__()
        self.embedding = nn.Embedding(output_dim, emb_dim, padding_idx=0)
        self.rnn = get_rnn_cell(cell_type)(
            emb_dim, hidden_dim, num_layers, dropout=dropout if num_layers > 1 else 0.0
        )
        self.out = nn.Linear(hidden_dim, output_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, input_step, hidden):
        embedded = self.embedding(input_step)  # [1, 1, emb_dim]
        output, hidden = self.rnn(embedded, hidden)
        output = self.dropout(output.squeeze(0))  # Apply dropout to RNN output
        prediction = self.out(output)             # [1, output_dim]
        return prediction, hidden

# === Seq2Seq Wrapper === #
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, device, sos_idx):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.device = device
        self.sos_idx = sos_idx

    def forward(self, src, trg, teacher_forcing_ratio=0.5):
        trg_len = trg.shape[0]
        output_dim = self.decoder.out.out_features
        outputs = torch.zeros(trg_len, 1, output_dim).to(self.device)

        hidden = self.encoder(src)
        hidden = self.adjust_hidden_for_decoder(hidden, self.decoder.rnn.num_layers)

        input_step = torch.tensor([[self.sos_idx]], device=self.device)

        for t in range(trg_len):
            output, hidden = self.decoder(input_step, hidden)
            outputs[t] = output
            teacher_force = torch.rand(1).item() < teacher_forcing_ratio
            top1 = output.argmax(1).unsqueeze(0)
            input_step = trg[t].unsqueeze(0) if teacher_force else top1

        return outputs

    def adjust_hidden_for_decoder(self, hidden, target_layers):
        """
        Adjust the encoder's hidden state to match the number of decoder layers.
        Pads if encoder has fewer layers, trims if more.
        Works for GRU/RNN (tensor) and LSTM (tuple).
        """
        if isinstance(hidden, tuple):  # LSTM
            h, c = hidden
            h = self._match_layers(h, target_layers)
            c = self._match_layers(c, target_layers)
            return (h, c)
        else:  # GRU or RNN
            return self._match_layers(hidden, target_layers)

    def _match_layers(self, state, target_layers):
        """
        Pad or trim the hidden state tensor to match target number of layers.
        """
        current_layers = state.size(0)
        if current_layers == target_layers:
            return state
        elif current_layers < target_layers:
            diff = target_layers - current_layers
            pad = torch.zeros(diff, state.size(1), state.size(2), device=state.device)
            return torch.cat([state, pad], dim=0)
        else:  # current_layers > target_layers
            return state[:target_layers]


In [9]:
emb_dim = 64
hidden_dim = 128
cell_type = "RNN"  # or "GRU", "RNN"
num_layers = 2


In [10]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [11]:
'''
import torch.optim as optim

encoder = Encoder(len(src2idx), emb_dim, hidden_dim, cell_type, num_layers)
decoder = Decoder(len(trg2idx), emb_dim, hidden_dim, cell_type, num_layers)
sos_idx = trg2idx["<sos>"]
model = Seq2Seq(encoder, decoder, DEVICE, sos_idx).to(DEVICE)

optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss(ignore_index=trg2idx["<pad>"])
'''
def tensor_from_word(word, mapping, add_eos=False):
    indices = [mapping[ch] for ch in word]
    if add_eos:
        indices.append(mapping["<eos>"])
    return torch.tensor(indices, dtype=torch.long, device=DEVICE).unsqueeze(1)

'''
for epoch in range(1, 6):
    model.train()
    epoch_loss = 0
    train_correct = 0
    train_total = 0

    for src_word, trg_word in pairs:
        src_tensor = tensor_from_word(src_word, src2idx)
        trg_tensor = tensor_from_word(trg_word, trg2idx, add_eos=True)

        optimizer.zero_grad()
        output = model(src_tensor, trg_tensor)
        output_dim = output.shape[-1]
        loss = criterion(output.view(-1, output_dim), trg_tensor.view(-1))
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()

        # Training accuracy
        pred_tokens = output.argmax(-1).view(-1)
        true_tokens = trg_tensor.view(-1)
        mask = true_tokens != trg2idx["<pad>"]
        correct = (pred_tokens == true_tokens) & mask
        train_correct += correct.sum().item()
        train_total += mask.sum().item()

    # Validation
    model.eval()
    val_loss = 0
    val_correct = 0
    val_total = 0
    with torch.no_grad():
        for src_word, trg_word in val_pairs:
            src_tensor = tensor_from_word(src_word, src2idx)
            trg_tensor = tensor_from_word(trg_word, trg2idx, add_eos=True)
            output = model(src_tensor, trg_tensor)
            loss = criterion(output.view(-1, output.shape[-1]), trg_tensor.view(-1))
            val_loss += loss.item()

            pred_tokens = output.argmax(-1).view(-1)
            true_tokens = trg_tensor.view(-1)
            mask = true_tokens != trg2idx["<pad>"]
            correct = (pred_tokens == true_tokens) & mask
            val_correct += correct.sum().item()
            val_total += mask.sum().item()

    train_acc = train_correct / train_total
    val_acc = val_correct / val_total
    print(f"Epoch {epoch:3d} | Train Loss: {epoch_loss/len(pairs):.4f} | "
          f"Train Acc: {train_acc:.4f} | Val Loss: {val_loss/len(val_pairs):.4f} | Val Acc: {val_acc:.4f}")

'''

'\nfor epoch in range(1, 6):\n    model.train()\n    epoch_loss = 0\n    train_correct = 0\n    train_total = 0\n\n    for src_word, trg_word in pairs:\n        src_tensor = tensor_from_word(src_word, src2idx)\n        trg_tensor = tensor_from_word(trg_word, trg2idx, add_eos=True)\n\n        optimizer.zero_grad()\n        output = model(src_tensor, trg_tensor)\n        output_dim = output.shape[-1]\n        loss = criterion(output.view(-1, output_dim), trg_tensor.view(-1))\n        loss.backward()\n        optimizer.step()\n        epoch_loss += loss.item()\n\n        # Training accuracy\n        pred_tokens = output.argmax(-1).view(-1)\n        true_tokens = trg_tensor.view(-1)\n        mask = true_tokens != trg2idx["<pad>"]\n        correct = (pred_tokens == true_tokens) & mask\n        train_correct += correct.sum().item()\n        train_total += mask.sum().item()\n\n    # Validation\n    model.eval()\n    val_loss = 0\n    val_correct = 0\n    val_total = 0\n    with torch.no_grad(

In [12]:
def beam_search(model, src_tensor, beam_size=3, max_len=30):
    model.eval()
    with torch.no_grad():
        hidden = model.encoder(src_tensor)
        hidden = model.adjust_hidden_for_decoder(hidden, model.decoder.rnn.num_layers)


        if isinstance(hidden, tuple):
            h, c = hidden
            hidden = (
                h[:model.decoder.rnn.num_layers],
                c[:model.decoder.rnn.num_layers],
            )
        else:
            hidden = hidden[:model.decoder.rnn.num_layers]

        sequences = [([model.sos_idx], 0.0, hidden)]

        for _ in range(max_len):
            all_candidates = []
            for seq, score, h in sequences:
                input_step = torch.tensor([[seq[-1]]], device=model.device)
                output, h_new = model.decoder(input_step, h)
                probs = torch.log_softmax(output, dim=1)
                topk = torch.topk(probs, beam_size)

                for i in range(beam_size):
                    token = topk.indices[0][i].item()
                    token_score = topk.values[0][i].item()
                    new_seq = seq + [token]
                    all_candidates.append((new_seq, score + token_score, h_new))

            sequences = sorted(all_candidates, key=lambda x: x[1], reverse=True)[:beam_size]

            if all(seq[-1] == trg2idx["<eos>"] for seq, _, _ in sequences):
                break

        best_seq = sequences[0][0][1:]  # remove <sos>
        return "".join([idx2trg[i] for i in best_seq if i != trg2idx["<eos>"]])


In [13]:
!pip install -q wandb
import wandb


In [14]:
wandb.login(key='af7d7cf29d8954a13afb06c7a0d0c196c36ac51b')


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mma24m003[0m ([33mma24m003-iit-madras[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [15]:
sweep_config = {
    "method": "bayes",
    "metric": {"name": "val_acc", "goal": "maximize"},
    "parameters": {
        "emb_dim": {"values": [16,32,64,256]},
        "hidden_dim": {"values": [16,32,64,256]},
        "cell_type": {"values": ["RNN","GRU","LSTM"]},
        "enc_layers": {"values": [1,2,3]},
        "dec_layers": {"values": [1, 2, 3]},
        "dropout": {"values": [0,0.2, 0.3]},
        "beam_size": {"values": [1, 3, 2]},
        "lr": {"values": [0.001, 0.0005]},
        "teacher_forcing_ratio": {"values": [0.3, 0.5, 0.7, 1.0]}

    }
}


In [16]:
def train_model(config=None):
    with wandb.init(config=config) as run:
        config = wandb.config

        run.name = f"emb{config.emb_dim}_hid{config.hidden_dim}_{config.cell_type}_enc{config.enc_layers}_dec{config.dec_layers}_drop{int(config.dropout*100)}_beam{config.beam_size}_lr{config.lr}"

        encoder = Encoder(len(src2idx), config.emb_dim, config.hidden_dim,
                          config.cell_type, config.enc_layers, config.dropout)
        decoder = Decoder(len(trg2idx), config.emb_dim, config.hidden_dim,
                          config.cell_type, config.dec_layers, config.dropout)
        model = Seq2Seq(encoder, decoder, DEVICE, sos_idx=trg2idx["<sos>"]).to(DEVICE)

        optimizer = optim.Adam(model.parameters(), lr=config.lr)
        criterion = nn.CrossEntropyLoss(ignore_index=trg2idx["<pad>"])
        beam_size = config.get("beam_size", 3)

        for epoch in range(1, 6):
            model.train()
            total_loss = 0
            train_correct = 0
            train_total = 0

            for src_word, trg_word in pairs:
                src_tensor = tensor_from_word(src_word, src2idx)
                trg_tensor = tensor_from_word(trg_word, trg2idx, add_eos=True)

                optimizer.zero_grad()
                output = model(src_tensor, trg_tensor, teacher_forcing_ratio=config.get("teacher_forcing_ratio", 0.5))
                loss = criterion(output.view(-1, output.size(-1)), trg_tensor.view(-1))
                loss.backward()
                optimizer.step()
                total_loss += loss.item()

                pred_tokens = output.argmax(-1).view(-1)
                true_tokens = trg_tensor.view(-1)
                mask = true_tokens != trg2idx["<pad>"]
                correct = (pred_tokens == true_tokens) & mask
                train_correct += correct.sum().item()
                train_total += mask.sum().item()

            model.eval()
            val_loss = 0
            val_correct = 0
            val_total = 0
            exact_match_count = 0

            with torch.no_grad():
                for src_word, trg_word in val_pairs:
                    src_tensor = tensor_from_word(src_word, src2idx)
                    trg_tensor = tensor_from_word(trg_word, trg2idx, add_eos=True)

                    output = model(src_tensor, trg_tensor, teacher_forcing_ratio=0.0)
                    loss = criterion(output.view(-1, output.size(-1)), trg_tensor.view(-1))
                    val_loss += loss.item()

                    pred_tokens = output.argmax(-1).view(-1)
                    true_tokens = trg_tensor.view(-1)
                    mask = true_tokens != trg2idx["<pad>"]
                    correct = (pred_tokens == true_tokens) & mask
                    val_correct += correct.sum().item()
                    val_total += mask.sum().item()

                    # Beam search for exact match accuracy
                    pred_str = beam_search(model, src_tensor, beam_size=beam_size)
                    if pred_str == trg_word:
                        exact_match_count += 1

            train_acc = train_correct / train_total
            val_acc = val_correct / val_total
            exact_match = exact_match_count / len(val_pairs)
            
            print(f"Epoch {epoch:2d} | "
                  f"Train Loss: {total_loss / len(pairs):.4f} | "
                  f"Train Acc: {train_acc:.4f} | "
                  f"Val Loss: {val_loss / len(val_pairs):.4f} | "
                  f"Val Acc: {val_acc:.4f} | "
                  f"Val Exact Match: {exact_match:.4f} | "
                  f"Beam Size: {beam_size}")

            
            wandb.log({
                "epoch": epoch,
                "train_loss": total_loss / len(pairs),
                "val_loss": val_loss / len(val_pairs),
                "train_accuracy": train_correct / train_total,
                "val_accuracy": val_correct / val_total,
                "val_exact_match": exact_match_count / len(val_pairs),
                "beam_size": beam_size
            })


In [17]:
'''
sweep_id = wandb.sweep(sweep_config, project="transliteration-sweep")
wandb.agent(sweep_id, function=train_model, count=100)
wandb.finish()
'''

'\nsweep_id = wandb.sweep(sweep_config, project="transliteration-sweep")\nwandb.agent(sweep_id, function=train_model, count=100)\nwandb.finish()\n'

In [18]:
best_config = {
    "emb_dim": 32,
    "hidden_dim": 256,
    "cell_type": "LSTM",
    "enc_layers": 3,
    "dec_layers": 2,
    "dropout": 0,
    "beam_size": 3,
    "lr": 0.001}


In [19]:
def best_train_model(config=None):
    with wandb.init(config=config):
        config = wandb.config

        encoder = Encoder(len(src2idx), config.emb_dim, config.hidden_dim,
                          config.cell_type, config.enc_layers, config.dropout)
        decoder = Decoder(len(trg2idx), config.emb_dim, config.hidden_dim,
                          config.cell_type, config.dec_layers, config.dropout)
        model = Seq2Seq(encoder, decoder, DEVICE, sos_idx=trg2idx["<sos>"]).to(DEVICE)

        optimizer = optim.Adam(model.parameters(), lr=config.lr)
        criterion = nn.CrossEntropyLoss(ignore_index=trg2idx["<pad>"])
        beam_size = config.get("beam_size", 3)

        for epoch in range(1, 10):
            model.train()
            total_loss = 0
            train_correct = 0
            train_total = 0

            for src_word, trg_word in pairs:
                src_tensor = tensor_from_word(src_word, src2idx)
                trg_tensor = tensor_from_word(trg_word, trg2idx, add_eos=True)

                optimizer.zero_grad()
                output = model(src_tensor, trg_tensor, teacher_forcing_ratio=config.get("teacher_forcing_ratio", 0.5))
                loss = criterion(output.view(-1, output.size(-1)), trg_tensor.view(-1))
                loss.backward()
                optimizer.step()
                total_loss += loss.item()

                pred_tokens = output.argmax(-1).view(-1)
                true_tokens = trg_tensor.view(-1)
                mask = true_tokens != trg2idx["<pad>"]
                correct = (pred_tokens == true_tokens) & mask
                train_correct += correct.sum().item()
                train_total += mask.sum().item()

            model.eval()
            val_loss = 0
            val_correct = 0
            val_total = 0
            exact_match_count = 0

            with torch.no_grad():
                for src_word, trg_word in val_pairs:
                    src_tensor = tensor_from_word(src_word, src2idx)
                    trg_tensor = tensor_from_word(trg_word, trg2idx, add_eos=True)

                    output = model(src_tensor, trg_tensor, teacher_forcing_ratio=0.0)
                    loss = criterion(output.view(-1, output.size(-1)), trg_tensor.view(-1))
                    val_loss += loss.item()

                    pred_tokens = output.argmax(-1).view(-1)
                    true_tokens = trg_tensor.view(-1)
                    mask = true_tokens != trg2idx["<pad>"]
                    correct = (pred_tokens == true_tokens) & mask
                    val_correct += correct.sum().item()
                    val_total += mask.sum().item()

                    # Beam search for exact match accuracy
                    pred_str = beam_search(model, src_tensor, beam_size=beam_size)
                    if pred_str == trg_word:
                        exact_match_count += 1

            train_acc = train_correct / train_total
            val_acc = val_correct / val_total
            exact_match = exact_match_count / len(val_pairs)

            print(f"Epoch {epoch:2d} | "
                  f"Train Loss: {total_loss / len(pairs):.4f} | "
                  f"Train Acc: {train_acc:.4f} | "
                  f"Val Loss: {val_loss / len(val_pairs):.4f} | "
                  f"Val Acc: {val_acc:.4f} | "
                  f"Val Exact Match: {exact_match:.4f} | "
                  f"Beam Size: {beam_size}")

            wandb.log({
                "epoch": epoch,
                "train_loss": total_loss / len(pairs),
                "val_loss": val_loss / len(val_pairs),
                "train_accuracy": train_acc,
                "val_accuracy": val_acc,
                "val_exact_match": exact_match,
                "beam_size": beam_size
            })

        return model  


In [20]:
model = best_train_model(config=best_config)


Epoch  1 | Train Loss: 1.8568 | Train Acc: 0.4796 | Val Loss: 1.3234 | Val Acc: 0.6003 | Val Exact Match: 0.1258 | Beam Size: 3
Epoch  2 | Train Loss: 0.7910 | Train Acc: 0.7616 | Val Loss: 1.0493 | Val Acc: 0.6928 | Val Exact Match: 0.1941 | Beam Size: 3
Epoch  3 | Train Loss: 0.5265 | Train Acc: 0.8390 | Val Loss: 1.0341 | Val Acc: 0.7188 | Val Exact Match: 0.2618 | Beam Size: 3
Epoch  4 | Train Loss: 0.3847 | Train Acc: 0.8822 | Val Loss: 1.0757 | Val Acc: 0.7310 | Val Exact Match: 0.3030 | Beam Size: 3
Epoch  5 | Train Loss: 0.2949 | Train Acc: 0.9099 | Val Loss: 1.1332 | Val Acc: 0.7359 | Val Exact Match: 0.2756 | Beam Size: 3
Epoch  6 | Train Loss: 0.2452 | Train Acc: 0.9254 | Val Loss: 1.1698 | Val Acc: 0.7464 | Val Exact Match: 0.3195 | Beam Size: 3
Epoch  7 | Train Loss: 0.2098 | Train Acc: 0.9348 | Val Loss: 1.1734 | Val Acc: 0.7433 | Val Exact Match: 0.3250 | Beam Size: 3
Epoch  8 | Train Loss: 0.1874 | Train Acc: 0.9417 | Val Loss: 1.1979 | Val Acc: 0.7461 | Val Exact Match

0,1
beam_size,▁▁▁▁▁▁▁▁▁
epoch,▁▂▃▄▅▅▆▇█
train_accuracy,▁▅▆▇▇████
train_loss,█▄▂▂▂▁▁▁▁
val_accuracy,▁▅▇▇▇████
val_exact_match,▁▃▅▆▆▇▇▇█
val_loss,█▁▁▂▃▄▄▅▇

0,1
beam_size,3.0
epoch,9.0
train_accuracy,0.9479
train_loss,0.16738
val_accuracy,0.74454
val_exact_match,0.35157
val_loss,1.30046


In [21]:

test_df = pd.read_csv("/kaggle/input/telugu-lexicon/te.translit.sampled.test.tsv", sep="\t", header=None)
test_df.columns = ["target", "source", "label"]  # adjust if only 2 columns
test_df = test_df.dropna(subset=["source", "target"])
test_df["source"] = test_df["source"].astype(str)
test_df["target"] = test_df["target"].astype(str)
test_pairs = list(zip(test_df["source"], test_df["target"]))


In [22]:
print(len(test_pairs))

5747


In [23]:
model.eval()
test_correct = 0
test_total = 0
exact_match_count = 0
beam_size = best_config["beam_size"]

with torch.no_grad():
    for src_word, trg_word in test_pairs:
        src_tensor = tensor_from_word(src_word, src2idx)
        trg_tensor = tensor_from_word(trg_word, trg2idx, add_eos=True)

        output = model(src_tensor, trg_tensor, teacher_forcing_ratio=0.0)

        # Character-level accuracy
        pred_tokens = output.argmax(-1).view(-1)
        true_tokens = trg_tensor.view(-1)
        mask = true_tokens != trg2idx["<pad>"]
        correct = (pred_tokens == true_tokens) & mask
        test_correct += correct.sum().item()
        test_total += mask.sum().item()

        # Word-level exact match via beam search
        pred_str = beam_search(model, src_tensor, beam_size=beam_size)
        if pred_str == trg_word:
            exact_match_count += 1

test_char_acc = test_correct / test_total
test_exact_match = exact_match_count / len(test_pairs)

print(f"Test Char Accuracy: {test_char_acc:.4f}")
print(f"Test Exact Match:  {test_exact_match:.4f}")


Test Char Accuracy: 0.7460
Test Exact Match:  0.3518


In [24]:
from tabulate import tabulate

def display_predictions(model, test_pairs, beam_size=3, max_samples=10):
    model.eval()
    table = []

    with torch.no_grad():
        sampled_pairs = random.sample(test_pairs, k=min(max_samples, len(test_pairs)))
        for src_word, trg_word in sampled_pairs:
            src_tensor = tensor_from_word(src_word, src2idx)
            pred_str = beam_search(model, src_tensor, beam_size=beam_size)
            match = "✅" if pred_str == trg_word else "❌"
            table.append([src_word, pred_str, trg_word, match])

    headers = ["Input (Latin)", "Predicted (Telugu)", "Target (Telugu)", "Match"]
    print(tabulate(table, headers=headers, tablefmt="fancy_grid"))


In [26]:
display_predictions(model, test_pairs, beam_size=best_config["beam_size"], max_samples=10)


╒═════════════════╤══════════════════════╤═══════════════════╤═════════╕
│ Input (Latin)   │ Predicted (Telugu)   │ Target (Telugu)   │ Match   │
╞═════════════════╪══════════════════════╪═══════════════════╪═════════╡
│ bible           │ బిబీలు                  │ బైబిల్               │ ❌      │
├─────────────────┼──────────────────────┼───────────────────┼─────────┤
│ kuuragaayalanu  │ కూరాగయాలను               │ కూరగాయలను            │ ❌      │
├─────────────────┼──────────────────────┼───────────────────┼─────────┤
│ agency          │ అజెన్నీ                 │ ఏజన్సీ              │ ❌      │
├─────────────────┼──────────────────────┼───────────────────┼─────────┤
│ dhaampatya      │ దాంపత్య                 │ దాంపత్య              │ ✅      │
├─────────────────┼──────────────────────┼───────────────────┼─────────┤
│ prasaaraala     │ ప్రసారాల                │ ప్రసారాల             │ ✅      │
├─────────────────┼──────────────────────┼───────────────────┼─────────┤
│ satyaalu        │ సత్యాల