In [1]:
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

# Load the dataset
train_data = pd.read_csv('Dakshina Dataset\Dakshina Dataset\hi\lexicons\hi.translit.sampled.train.tsv')
val_data = pd.read_csv('Dakshina Dataset\Dakshina Dataset\hi\lexicons\hi.translit.sampled.dev.tsv')
test_data = pd.read_csv('Dakshina Dataset\Dakshina Dataset\hi\lexicons\hi.translit.sampled.test.tsv')

# Define the preprocessing function
def preprocess(text):
    # TODO: implement preprocessing
    return text

# Define the dataset class
class CustomDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        x = preprocess(self.data['x'][index])
        y = preprocess(self.data['y'][index])
        return x, y

# Create the datasets and dataloaders
train_dataset = CustomDataset(train_data)
val_dataset = CustomDataset(val_data)
test_dataset = CustomDataset(test_data)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)
test_loader = DataLoader(test_dataset, batch_size=32)


In [3]:
class Encoder(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(Encoder, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size, num_layers, batch_first=True)

    def forward(self, x):
        embedded = self.embedding(x)
        output, hidden = self.lstm(embedded)
        return output, hidden

class Decoder(nn.Module):
    def __init__(self, output_size, hidden_size, num_layers):
        super(Decoder, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden, cell):
        x = x.unsqueeze(1)
        embedded = self.embedding(x)
        output, (hidden, cell) = self.lstm(embedded, (hidden, cell))
        prediction = self.fc(output.squeeze(1))
        return prediction, hidden, cell





In [8]:
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder):
        super(Seq2Seq, self).__init__()
        self.encoder = encoder
        self.decoder = decoder

    def forward(self, source, target, teacher_forcing_ratio=0.5):
        batch_size = source.shape[0]
        target_length = target.shape[1]
        target_vocab_size = self.decoder.fc.out_features

        outputs = torch.zeros(batch_size, target_length, target_vocab_size).to(device)

        encoder_output, hidden = self.encoder(source)
        hidden = hidden[:self.decoder.num_layers]
        cell = torch.zeros_like(hidden)

        x = target[:, 0]

        for t in range(1, target_length):
            output, hidden, cell = self.decoder(x, hidden, cell)
            outputs[:, t] = output
            top1 = output.argmax(1)
            if random.random() < teacher_forcing_ratio:
                x = target[:, t]
            else:
                x = top1

        return outputs


In [9]:
import random
import numpy as np

# Define the hyperparameters
input_embedding_sizes = [16, 64]
num_encoder_layers = [1, 3]
num_decoder_layers = [1, 3]
hidden_sizes = [16, 64]

# Define the training function
def train(model, dataloader, optimizer, criterion):
    model.train()
    total_loss = 0
    for i, (x, y) in enumerate(dataloader):
        optimizer.zero_grad()
        source = torch.tensor([char_to_int[c] for c in x]).to(device)
        target = torch.tensor([char_to_int[c] for c in y]).to(device)
        output = model(source, target)
        loss = criterion(output[:, 1:].reshape(-1, len(char_to_int)), target[:, 1:].reshape(-1))
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(dataloader)

# Define the evaluation function
def evaluate(model, dataloader, criterion):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for i, (x, y) in enumerate(dataloader):
            source = torch.tensor([char_to_int[c] for c in x]).to(device)
            target = torch.tensor([char_to_int[c] for c in y]).to(device)
            output = model(source, target, teacher_forcing_ratio=0)
            loss = criterion(output[:, 1:].reshape(-1, len(char_to_int)), target[:, 1:].reshape(-1))
            total_loss += loss.item()
    return total_loss / len(dataloader)


In [14]:
from torch import optim


# Define the hyperparameter settings
hyperparameters = [(e, n1, n2, h) for e in input_embedding_sizes for n1 in num_encoder_layers for n2 in num_decoder_layers for h in hidden_sizes]

# Define the training data and validation data
model = Seq2Seq(len(char_to_int), input_embedding_size, hidden_size, len(char_to_int), num_encoder_layers, num_decoder_layers)

# Define the loss function and the optimizer
criterion = nn.CrossEntropyLoss(ignore_index=0)
optimizer = optim.Adam(model.parameters())

# Train the model with different hyperparameters
results = []
for i, (input_embedding_size, num_encoder_layers, num_decoder_layers, hidden_size) in enumerate(hyperparameters):
    print(f'Training model {i+1}/{len(hyperparameters)}')
    model = Seq2Seq(len(char_to_int), input_embedding_size, hidden_size, len(char_to_int), num_encoder_layers, num_decoder_layers)
    model.to(device)
    optimizer = optim.Adam(model.parameters())
    train_losses = []
    val_losses = []
    for epoch in range(10):
        train_loss = train(model, train_loader, optimizer, criterion)
        val_loss = evaluate(model, val_loader, criterion)
        train_losses.append(train_loss)
        val_losses.append(val_loss)
        print(f'Epoch {epoch+1}, Train loss: {train_loss:.4f}, Val loss: {val_loss:.4f}')
    results.append({
        'input_embedding_size': input_embedding_size,
        'num_encoder_layers': num_encoder_layers,
        'num_decoder_layers': num_decoder_layers,
        'hidden_size': hidden_size,
        'train_losses': train_losses,
        'val_losses': val_losses
    })




NameError: name 'char_to_int' is not defined

In [None]:
import matplotlib.pyplot as plt

# Plot the results
def plot_results(results, hyperparameter_name):