### Relevant Imports


In [1]:
import torch 
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split

import os
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter(log_dir = 'logs')
import re

from structure.transformer import Transformer
from structure.Dataset import English_Hindi_Dataset

from sub_modules.embedding import Language_Embedding
from sub_modules.masks import get_masks

  from .autonotebook import tqdm as notebook_tqdm


### Initializations


In [2]:
# Read data
read_max = 7_00_000 ######

# basics
batch_size = 512
sequence_length = 100
d_model = 512
num_of_sentences = 3_00_000
# transfomer
num_encoder_decoder_layers = 6
num_heads = 8
hidden_layers = 2048

dropout_ff = 0.3
dropout_attn = 0.2


### Dataset


In [3]:
dataset = English_Hindi_Dataset('Dataset/train.en/train.en', 
                                    'Dataset/train.hi/train.hi',
                                    num_of_sentences = num_of_sentences,
                                    max_sequence_length = sequence_length,
                                    read_max = read_max)

en_vocab_size = len(set(dataset.en_vocab))
hi_vocab_size = len(set(dataset.hi_vocab))

assert len(dataset) == num_of_sentences, f"Dataset is of length: {len(dataset)} but required sample :{num_of_sentences}"


Total unique characters: English-> 97 Hindi-> 174
	Dataset Cleaned
	Dataset Tokenized and Pading is Done


### Embeddings


In [4]:
# embeddings
embeddings = Language_Embedding(en_vocab_size, hi_vocab_size, d_model)

### Data Loader


In [5]:
dataset_size = len(dataset)
train_size = int(0.8 * dataset_size)
val_size = dataset_size - int(0.8 * dataset_size)

train_dataset, val_dataset = random_split(dataset, [train_size,val_size])




### Model Initializations


In [6]:
# GPU for training
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(f" Using: {device}")

model = Transformer(
    num_encoder_decoder_layers=num_encoder_decoder_layers,
    d_model=d_model,
    sequence_length=sequence_length,
    hidden_layers=hidden_layers,
    num_heads=num_heads,
    hi_voab_size=hi_vocab_size,
    dropout_ff=dropout_ff,
    dropout_attn=dropout_attn
).to(device)


# Loss
criterian = nn.CrossEntropyLoss(ignore_index= dataset.hindi_to_index[dataset.PADDING_TOKEN], reduction ='none')

# Parameter Initialization
for param in model.parameters():
    if param.dim()>1:
        nn.init.xavier_uniform_(param)
        
# optimizer 
optim = torch.optim.Adam(model.parameters(), lr= 1e-4)


 Using: cuda


### Model Training and Evaluation


In [7]:
model_save_path = "saved_models"  # Specify your directory to save models
os.makedirs(model_save_path, exist_ok=True)  # Create directory if it doesn't exist


def get_latest_model_checkpoint(model_save_path):
    model_files = os.listdir(model_save_path)
    model_epochs = [int(re.findall(r'model_epoch_(\d+).pt', file)[0]) for file in model_files if file.endswith('.pt')]
    
    if len(model_epochs)>0:
        latest_epoch = max(model_epochs)
        model_save_file = os.path.join(model_save_path, f"model_epoch_{latest_epoch}.pt")
        return latest_epoch, model_save_file
    else:
        return None, None
    
latest_epoch, model_save_file = get_latest_model_checkpoint(model_save_path)

if model_save_file:
    print(f"Loading model from {model_save_file}")
    model.load_state_dict(torch.load(model_save_file))
    current_epoch = latest_epoch + 1
else:
    print("No saved model found. Training from scratch.")
    current_epoch = 0

No saved model found. Training from scratch.


##### Training


In [8]:
best_val_loss = float('inf')
total_epochs = 100

for epoch in range(current_epoch, total_epochs + 1):
    print(f"Epoch -> {epoch}")
    total_loss = 0
    train_data_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)
    train_data_iterator = iter(train_data_loader)

    for batch_num, batch in enumerate(tqdm(train_data_iterator, desc=f'Epoch {epoch}/{total_epochs}', unit='batch')):
        model.train()
        en_batch, hi_batch = batch
        en_batch = en_batch.to(device)
        hi_batch = hi_batch.to(device)

        ds_mask, es_mask, edc_mask = get_masks(dataset, en_batch, hi_batch)
        ds_mask, es_mask, edc_mask = ds_mask.to(device), es_mask.to(device), edc_mask.to(device)

        optim.zero_grad()

        en_batch_embedded, hi_batch_embedded = embeddings(en_batch, hi_batch)
        en_batch_embedded, hi_batch_embedded = en_batch_embedded.to(device), hi_batch_embedded.to(device)
        hi_prediction = model(en_batch_embedded, hi_batch_embedded, ds_mask, es_mask, edc_mask)

        # Prepare labels
        labels_untoken = [dataset.untokenize(hi_batch[index], dataset.index_to_hindi) for index in range(len(hi_batch))]
        labels = [dataset.tokenize(labels_untoken[index], dataset.hindi_to_index, start_token=False, end_token=True) for index in range(len(hi_batch))]
        labels = torch.stack(labels).to(device)

        # Calculate loss
        loss = criterian(
            hi_prediction.view(-1, hi_vocab_size),
            labels.view(-1)
        )

        # Mask padding tokens
        valid_indices = (labels.view(-1) != dataset.hindi_to_index[dataset.PADDING_TOKEN])
        loss = loss[valid_indices].mean()  # Calculate the mean loss over valid indices

        total_loss += loss.item()

        loss.backward()
        
        # Gradient clipping
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

        optim.step()

        # Log loss periodically
        if batch_num % 300 == 0:
            writer.add_scalar('Loss/Batch', loss.item(), epoch * len(train_data_iterator) + batch_num)
    
    avg_loss = total_loss / (batch_num + 1)
    writer.add_scalar('Loss/Epoch', avg_loss, epoch)
    print(f"\t\tEpoch [{epoch + 1}/{total_epochs}], training Loss: {avg_loss:.4f}")
 
    # Validation Loop
    model.eval()
    val_loss = 0
    val_data_loader = DataLoader(val_dataset, batch_size=batch_size,shuffle=False, pin_memory=True)
    val_data_iterator = iter(val_data_loader)
    with torch.no_grad():
        for val_batch_num, val_batch in enumerate(tqdm(val_data_iterator, desc=f'Validation Epoch {epoch }/{total_epochs}', unit='batch')):
            en_val_batch, hi_val_batch = val_batch
            en_val_batch = en_val_batch.to(device)
            hi_val_batch = hi_val_batch.to(device)
            
            ds_val_mask, es_val_mask, edc_val_mask = get_masks(dataset, en_val_batch, hi_val_batch)
            ds_val_mask, es_val_mask, edc_val_mask = ds_val_mask.to(device), es_val_mask.to(device), edc_val_mask.to(device)
            
            en_val_embedded, hi_val_embedded = embeddings(en_val_batch, hi_val_batch)
            en_val_embedded, hi_val_embedded = en_val_embedded.to(device), hi_val_embedded.to(device)
            
            hi_val_prediction = model(en_val_embedded, hi_val_embedded, ds_val_mask, es_val_mask, edc_val_mask)
            
            val_labels = [dataset.untokenize(hi_val_batch[index], dataset.index_to_hindi) for index in range(len(hi_val_batch))]
            val_labels = [dataset.tokenize(val_labels[index], dataset.hindi_to_index, start_token=False, end_token=True) for index in range(len(hi_val_batch))]
            val_labels = torch.stack(val_labels) 
            
            val_loss_batch = criterian(
                hi_val_prediction.view(-1, hi_vocab_size).to(device),
                val_labels.view(-1).to(device)
            ).to(device)
            
            valid_val_indices = torch.where(val_labels.view(-1) == dataset.hindi_to_index[dataset.PADDING_TOKEN], False, True)
            val_loss_batch = val_loss_batch.sum() / valid_val_indices.sum()
            
            val_loss += val_loss_batch.item()
    
    avg_val_loss = val_loss / (val_batch_num + 1)  # Average validation loss for the epoch
    writer.add_scalar('Loss/Validation_Epoch', avg_val_loss, epoch)


    ####### Print Epoch Losses #######
    print(f"\t\tEpoch [{epoch}/{total_epochs}], Validation Loss: {avg_val_loss:.4f}")
    
    print('\n')
    # Save Model
    model_save_file = os.path.join(model_save_path, f"model_epoch_{epoch }.pt")
    torch.save(model.state_dict(), model_save_file)

writer.close()

Epoch -> 0


Epoch 0/100: 100%|██████████| 469/469 [32:33<00:00,  4.17s/batch]


		Epoch [1/100], training Loss: 2.5267


Validation Epoch 0/100: 100%|██████████| 118/118 [04:19<00:00,  2.20s/batch]


		Epoch [0/100], Validation Loss: 2.0021


Epoch -> 1


Epoch 1/100: 100%|██████████| 469/469 [27:01<00:00,  3.46s/batch]


		Epoch [2/100], training Loss: 2.0047


Validation Epoch 1/100: 100%|██████████| 118/118 [04:32<00:00,  2.31s/batch]


		Epoch [1/100], Validation Loss: 1.7467


Epoch -> 2


Epoch 2/100: 100%|██████████| 469/469 [25:53<00:00,  3.31s/batch]


		Epoch [3/100], training Loss: 1.8267


Validation Epoch 2/100: 100%|██████████| 118/118 [04:20<00:00,  2.21s/batch]


		Epoch [2/100], Validation Loss: 1.6216


Epoch -> 3


Epoch 3/100: 100%|██████████| 469/469 [25:40<00:00,  3.28s/batch]


		Epoch [4/100], training Loss: 1.7205


Validation Epoch 3/100: 100%|██████████| 118/118 [04:21<00:00,  2.21s/batch]


		Epoch [3/100], Validation Loss: 1.5443


Epoch -> 4


Epoch 4/100: 100%|██████████| 469/469 [25:38<00:00,  3.28s/batch]


		Epoch [5/100], training Loss: 1.6458


Validation Epoch 4/100: 100%|██████████| 118/118 [04:27<00:00,  2.27s/batch]


		Epoch [4/100], Validation Loss: 1.4824


Epoch -> 5


Epoch 5/100: 100%|██████████| 469/469 [25:53<00:00,  3.31s/batch]


		Epoch [6/100], training Loss: 1.5884


Validation Epoch 5/100: 100%|██████████| 118/118 [04:23<00:00,  2.24s/batch]


		Epoch [5/100], Validation Loss: 1.4423


Epoch -> 6


Epoch 6/100: 100%|██████████| 469/469 [26:54<00:00,  3.44s/batch]


		Epoch [7/100], training Loss: 1.5414


Validation Epoch 6/100: 100%|██████████| 118/118 [05:43<00:00,  2.91s/batch]


		Epoch [6/100], Validation Loss: 1.4016


Epoch -> 7


Epoch 7/100: 100%|██████████| 469/469 [29:41<00:00,  3.80s/batch]


		Epoch [8/100], training Loss: 1.5024


Validation Epoch 7/100: 100%|██████████| 118/118 [05:05<00:00,  2.59s/batch]


		Epoch [7/100], Validation Loss: 1.3700


Epoch -> 8


Epoch 8/100: 100%|██████████| 469/469 [27:44<00:00,  3.55s/batch]


		Epoch [9/100], training Loss: 1.4675


Validation Epoch 8/100: 100%|██████████| 118/118 [04:22<00:00,  2.22s/batch]


		Epoch [8/100], Validation Loss: 1.3451


Epoch -> 9


Epoch 9/100: 100%|██████████| 469/469 [28:29<00:00,  3.64s/batch]


		Epoch [10/100], training Loss: 1.4343


Validation Epoch 9/100: 100%|██████████| 118/118 [06:08<00:00,  3.12s/batch]


		Epoch [9/100], Validation Loss: 1.3210


Epoch -> 10


Epoch 10/100: 100%|██████████| 469/469 [33:29<00:00,  4.29s/batch]


		Epoch [11/100], training Loss: 1.4027


Validation Epoch 10/100: 100%|██████████| 118/118 [05:29<00:00,  2.79s/batch]


		Epoch [10/100], Validation Loss: 1.2948


Epoch -> 11


Epoch 11/100: 100%|██████████| 469/469 [27:48<00:00,  3.56s/batch]


		Epoch [12/100], training Loss: 1.3714


Validation Epoch 11/100: 100%|██████████| 118/118 [04:20<00:00,  2.21s/batch]


		Epoch [11/100], Validation Loss: 1.2618


Epoch -> 12


Epoch 12/100: 100%|██████████| 469/469 [26:26<00:00,  3.38s/batch]


		Epoch [13/100], training Loss: 1.3393


Validation Epoch 12/100: 100%|██████████| 118/118 [05:36<00:00,  2.85s/batch]


		Epoch [12/100], Validation Loss: 1.2320


Epoch -> 13


Epoch 13/100: 100%|██████████| 469/469 [25:57<00:00,  3.32s/batch]


		Epoch [14/100], training Loss: 1.3073


Validation Epoch 13/100: 100%|██████████| 118/118 [04:33<00:00,  2.32s/batch]


		Epoch [13/100], Validation Loss: 1.1985


Epoch -> 14


Epoch 14/100: 100%|██████████| 469/469 [27:10<00:00,  3.48s/batch]


		Epoch [15/100], training Loss: 1.2748


Validation Epoch 14/100: 100%|██████████| 118/118 [04:20<00:00,  2.21s/batch]


		Epoch [14/100], Validation Loss: 1.1758


Epoch -> 15


Epoch 15/100: 100%|██████████| 469/469 [25:37<00:00,  3.28s/batch]


		Epoch [16/100], training Loss: 1.2408


Validation Epoch 15/100: 100%|██████████| 118/118 [04:19<00:00,  2.20s/batch]


		Epoch [15/100], Validation Loss: 1.1326


Epoch -> 16


Epoch 16/100: 100%|██████████| 469/469 [27:39<00:00,  3.54s/batch]


		Epoch [17/100], training Loss: 1.2072


Validation Epoch 16/100: 100%|██████████| 118/118 [06:42<00:00,  3.41s/batch]


		Epoch [16/100], Validation Loss: 1.1029


Epoch -> 17


Epoch 17/100: 100%|██████████| 469/469 [29:38<00:00,  3.79s/batch]


		Epoch [18/100], training Loss: 1.1738


Validation Epoch 17/100: 100%|██████████| 118/118 [06:45<00:00,  3.43s/batch]


		Epoch [17/100], Validation Loss: 1.0785


Epoch -> 18


Epoch 18/100: 100%|██████████| 469/469 [26:19<00:00,  3.37s/batch]


		Epoch [19/100], training Loss: 1.1432


Validation Epoch 18/100: 100%|██████████| 118/118 [06:37<00:00,  3.37s/batch]


		Epoch [18/100], Validation Loss: 1.0511


Epoch -> 19


Epoch 19/100: 100%|██████████| 469/469 [27:19<00:00,  3.50s/batch]


		Epoch [20/100], training Loss: 1.1135


Validation Epoch 19/100: 100%|██████████| 118/118 [05:06<00:00,  2.60s/batch]


		Epoch [19/100], Validation Loss: 1.0217


Epoch -> 20


Epoch 20/100: 100%|██████████| 469/469 [31:12<00:00,  3.99s/batch]


		Epoch [21/100], training Loss: 1.0867


Validation Epoch 20/100: 100%|██████████| 118/118 [06:20<00:00,  3.22s/batch]


		Epoch [20/100], Validation Loss: 1.0014


Epoch -> 21


Epoch 21/100: 100%|██████████| 469/469 [33:11<00:00,  4.25s/batch]


		Epoch [22/100], training Loss: 1.0617


Validation Epoch 21/100: 100%|██████████| 118/118 [06:16<00:00,  3.19s/batch]


		Epoch [21/100], Validation Loss: 0.9797


Epoch -> 22


Epoch 22/100: 100%|██████████| 469/469 [27:36<00:00,  3.53s/batch]


		Epoch [23/100], training Loss: 1.0383


Validation Epoch 22/100: 100%|██████████| 118/118 [04:35<00:00,  2.33s/batch]


		Epoch [22/100], Validation Loss: 0.9638


Epoch -> 23


Epoch 23/100: 100%|██████████| 469/469 [31:23<00:00,  4.02s/batch]


		Epoch [24/100], training Loss: 1.0168


Validation Epoch 23/100: 100%|██████████| 118/118 [04:21<00:00,  2.21s/batch]


		Epoch [23/100], Validation Loss: 0.9481


Epoch -> 24


Epoch 24/100: 100%|██████████| 469/469 [27:36<00:00,  3.53s/batch]


		Epoch [25/100], training Loss: 0.9967


Validation Epoch 24/100: 100%|██████████| 118/118 [04:18<00:00,  2.19s/batch]


		Epoch [24/100], Validation Loss: 0.9273


Epoch -> 25


Epoch 25/100: 100%|██████████| 469/469 [27:53<00:00,  3.57s/batch]


		Epoch [26/100], training Loss: 0.9782


Validation Epoch 25/100: 100%|██████████| 118/118 [04:13<00:00,  2.15s/batch]


		Epoch [25/100], Validation Loss: 0.9166


Epoch -> 26


Epoch 26/100: 100%|██████████| 469/469 [25:32<00:00,  3.27s/batch]


		Epoch [27/100], training Loss: 0.9609


Validation Epoch 26/100: 100%|██████████| 118/118 [04:13<00:00,  2.14s/batch]


		Epoch [26/100], Validation Loss: 0.9020


Epoch -> 27


Epoch 27/100: 100%|██████████| 469/469 [25:33<00:00,  3.27s/batch]


		Epoch [28/100], training Loss: 0.9448


Validation Epoch 27/100: 100%|██████████| 118/118 [04:14<00:00,  2.16s/batch]


		Epoch [27/100], Validation Loss: 0.8953


Epoch -> 28


Epoch 28/100: 100%|██████████| 469/469 [30:50<00:00,  3.95s/batch]


		Epoch [29/100], training Loss: 0.9296


Validation Epoch 28/100: 100%|██████████| 118/118 [06:27<00:00,  3.29s/batch]


		Epoch [28/100], Validation Loss: 0.8781


Epoch -> 29


Epoch 29/100: 100%|██████████| 469/469 [29:52<00:00,  3.82s/batch]


		Epoch [30/100], training Loss: 0.9152


Validation Epoch 29/100: 100%|██████████| 118/118 [04:16<00:00,  2.18s/batch]


		Epoch [29/100], Validation Loss: 0.8684


Epoch -> 30


Epoch 30/100: 100%|██████████| 469/469 [25:44<00:00,  3.29s/batch]


		Epoch [31/100], training Loss: 0.9020


Validation Epoch 30/100: 100%|██████████| 118/118 [04:13<00:00,  2.15s/batch]


		Epoch [30/100], Validation Loss: 0.8575


Epoch -> 31


Epoch 31/100: 100%|██████████| 469/469 [26:04<00:00,  3.34s/batch]


		Epoch [32/100], training Loss: 0.8897


Validation Epoch 31/100: 100%|██████████| 118/118 [06:28<00:00,  3.29s/batch]


		Epoch [31/100], Validation Loss: 0.8479


Epoch -> 32


Epoch 32/100: 100%|██████████| 469/469 [27:28<00:00,  3.52s/batch]


		Epoch [33/100], training Loss: 0.8782


Validation Epoch 32/100: 100%|██████████| 118/118 [04:12<00:00,  2.14s/batch]


		Epoch [32/100], Validation Loss: 0.8400


Epoch -> 33


Epoch 33/100: 100%|██████████| 469/469 [26:20<00:00,  3.37s/batch]


		Epoch [34/100], training Loss: 0.8671


Validation Epoch 33/100: 100%|██████████| 118/118 [06:28<00:00,  3.29s/batch]


		Epoch [33/100], Validation Loss: 0.8306


Epoch -> 34


Epoch 34/100: 100%|██████████| 469/469 [28:08<00:00,  3.60s/batch]


		Epoch [35/100], training Loss: 0.8559


Validation Epoch 34/100: 100%|██████████| 118/118 [04:13<00:00,  2.14s/batch]


		Epoch [34/100], Validation Loss: 0.8217


Epoch -> 35


Epoch 35/100: 100%|██████████| 469/469 [25:34<00:00,  3.27s/batch]


		Epoch [36/100], training Loss: 0.8456


Validation Epoch 35/100: 100%|██████████| 118/118 [04:48<00:00,  2.44s/batch]


		Epoch [35/100], Validation Loss: 0.8137


Epoch -> 36


Epoch 36/100: 100%|██████████| 469/469 [27:05<00:00,  3.47s/batch]


		Epoch [37/100], training Loss: 0.8361


Validation Epoch 36/100: 100%|██████████| 118/118 [04:12<00:00,  2.14s/batch]


		Epoch [36/100], Validation Loss: 0.8082


Epoch -> 37


Epoch 37/100: 100%|██████████| 469/469 [26:33<00:00,  3.40s/batch]


		Epoch [38/100], training Loss: 0.8278


Validation Epoch 37/100: 100%|██████████| 118/118 [06:29<00:00,  3.30s/batch]


		Epoch [37/100], Validation Loss: 0.8013


Epoch -> 38


Epoch 38/100: 100%|██████████| 469/469 [27:53<00:00,  3.57s/batch]


		Epoch [39/100], training Loss: 0.8181


Validation Epoch 38/100: 100%|██████████| 118/118 [04:13<00:00,  2.15s/batch]


		Epoch [38/100], Validation Loss: 0.7955


Epoch -> 39


Epoch 39/100: 100%|██████████| 469/469 [29:19<00:00,  3.75s/batch]


		Epoch [40/100], training Loss: 0.8103


Validation Epoch 39/100: 100%|██████████| 118/118 [06:27<00:00,  3.29s/batch]


		Epoch [39/100], Validation Loss: 0.7937


Epoch -> 40


Epoch 40/100: 100%|██████████| 469/469 [34:42<00:00,  4.44s/batch]


		Epoch [41/100], training Loss: 0.8017


Validation Epoch 40/100: 100%|██████████| 118/118 [05:48<00:00,  2.95s/batch]


		Epoch [40/100], Validation Loss: 0.7844


Epoch -> 41


Epoch 41/100: 100%|██████████| 469/469 [25:38<00:00,  3.28s/batch]


		Epoch [42/100], training Loss: 0.7949


Validation Epoch 41/100: 100%|██████████| 118/118 [04:13<00:00,  2.15s/batch]


		Epoch [41/100], Validation Loss: 0.7819


Epoch -> 42


Epoch 42/100: 100%|██████████| 469/469 [25:38<00:00,  3.28s/batch]


		Epoch [43/100], training Loss: 0.7872


Validation Epoch 42/100: 100%|██████████| 118/118 [04:13<00:00,  2.15s/batch]


		Epoch [42/100], Validation Loss: 0.7734


Epoch -> 43


Epoch 43/100: 100%|██████████| 469/469 [30:17<00:00,  3.88s/batch]


		Epoch [44/100], training Loss: 0.7798


Validation Epoch 43/100: 100%|██████████| 118/118 [05:17<00:00,  2.69s/batch]


		Epoch [43/100], Validation Loss: 0.7679


Epoch -> 44


Epoch 44/100: 100%|██████████| 469/469 [25:46<00:00,  3.30s/batch]


		Epoch [45/100], training Loss: 0.7731


Validation Epoch 44/100: 100%|██████████| 118/118 [04:13<00:00,  2.14s/batch]


		Epoch [44/100], Validation Loss: 0.7650


Epoch -> 45


Epoch 45/100: 100%|██████████| 469/469 [25:39<00:00,  3.28s/batch]


		Epoch [46/100], training Loss: 0.7668


Validation Epoch 45/100: 100%|██████████| 118/118 [04:13<00:00,  2.15s/batch]


		Epoch [45/100], Validation Loss: 0.7604


Epoch -> 46


Epoch 46/100: 100%|██████████| 469/469 [25:39<00:00,  3.28s/batch]


		Epoch [47/100], training Loss: 0.7602


Validation Epoch 46/100: 100%|██████████| 118/118 [04:13<00:00,  2.15s/batch]


		Epoch [46/100], Validation Loss: 0.7538


Epoch -> 47


Epoch 47/100: 100%|██████████| 469/469 [25:45<00:00,  3.30s/batch]


		Epoch [48/100], training Loss: 0.7551


Validation Epoch 47/100: 100%|██████████| 118/118 [04:17<00:00,  2.18s/batch]


		Epoch [47/100], Validation Loss: 0.7529


Epoch -> 48


Epoch 48/100: 100%|██████████| 469/469 [25:41<00:00,  3.29s/batch]


		Epoch [49/100], training Loss: 0.7483


Validation Epoch 48/100: 100%|██████████| 118/118 [04:13<00:00,  2.15s/batch]


		Epoch [48/100], Validation Loss: 0.7495


Epoch -> 49


Epoch 49/100: 100%|██████████| 469/469 [30:17<00:00,  3.88s/batch]


		Epoch [50/100], training Loss: 0.7428


Validation Epoch 49/100: 100%|██████████| 118/118 [06:29<00:00,  3.30s/batch]


		Epoch [49/100], Validation Loss: 0.7473


Epoch -> 50


Epoch 50/100: 100%|██████████| 469/469 [34:30<00:00,  4.41s/batch]


		Epoch [51/100], training Loss: 0.7373


Validation Epoch 50/100: 100%|██████████| 118/118 [06:28<00:00,  3.30s/batch]


		Epoch [50/100], Validation Loss: 0.7424


Epoch -> 51


Epoch 51/100: 100%|██████████| 469/469 [28:01<00:00,  3.58s/batch]


		Epoch [52/100], training Loss: 0.7325


Validation Epoch 51/100: 100%|██████████| 118/118 [04:21<00:00,  2.21s/batch]


		Epoch [51/100], Validation Loss: 0.7396


Epoch -> 52


Epoch 52/100: 100%|██████████| 469/469 [32:38<00:00,  4.18s/batch]


		Epoch [53/100], training Loss: 0.7275


Validation Epoch 52/100: 100%|██████████| 118/118 [04:15<00:00,  2.17s/batch]


		Epoch [52/100], Validation Loss: 0.7400


Epoch -> 53


Epoch 53/100: 100%|██████████| 469/469 [34:24<00:00,  4.40s/batch]


		Epoch [54/100], training Loss: 0.7232


Validation Epoch 53/100: 100%|██████████| 118/118 [04:36<00:00,  2.34s/batch]


		Epoch [53/100], Validation Loss: 0.7302


Epoch -> 54


Epoch 54/100: 100%|██████████| 469/469 [34:45<00:00,  4.45s/batch]


		Epoch [55/100], training Loss: 0.7186


Validation Epoch 54/100: 100%|██████████| 118/118 [06:28<00:00,  3.29s/batch]


		Epoch [54/100], Validation Loss: 0.7337


Epoch -> 55


Epoch 55/100: 100%|██████████| 469/469 [34:42<00:00,  4.44s/batch]


		Epoch [56/100], training Loss: 0.7147


Validation Epoch 55/100: 100%|██████████| 118/118 [06:28<00:00,  3.29s/batch]


		Epoch [55/100], Validation Loss: 0.7314


Epoch -> 56


Epoch 56/100: 100%|██████████| 469/469 [34:43<00:00,  4.44s/batch]


		Epoch [57/100], training Loss: 0.7085


Validation Epoch 56/100: 100%|██████████| 118/118 [06:28<00:00,  3.30s/batch]


		Epoch [56/100], Validation Loss: 0.7258


Epoch -> 57


Epoch 57/100: 100%|██████████| 469/469 [34:39<00:00,  4.43s/batch]


		Epoch [58/100], training Loss: 0.7030


Validation Epoch 57/100: 100%|██████████| 118/118 [06:28<00:00,  3.29s/batch]


		Epoch [57/100], Validation Loss: 0.7232


Epoch -> 58


Epoch 58/100: 100%|██████████| 469/469 [34:34<00:00,  4.42s/batch]


		Epoch [59/100], training Loss: 0.6991


Validation Epoch 58/100: 100%|██████████| 118/118 [06:29<00:00,  3.30s/batch]


		Epoch [58/100], Validation Loss: 0.7192


Epoch -> 59


Epoch 59/100: 100%|██████████| 469/469 [26:15<00:00,  3.36s/batch]


		Epoch [60/100], training Loss: 0.6955


Validation Epoch 59/100: 100%|██████████| 118/118 [04:14<00:00,  2.15s/batch]


		Epoch [59/100], Validation Loss: 0.7156


Epoch -> 60


Epoch 60/100: 100%|██████████| 469/469 [26:35<00:00,  3.40s/batch]


		Epoch [61/100], training Loss: 0.6915


Validation Epoch 60/100: 100%|██████████| 118/118 [06:29<00:00,  3.30s/batch]


		Epoch [60/100], Validation Loss: 0.7126


Epoch -> 61


Epoch 61/100: 100%|██████████| 469/469 [25:56<00:00,  3.32s/batch]


		Epoch [62/100], training Loss: 0.6874


Validation Epoch 61/100: 100%|██████████| 118/118 [04:13<00:00,  2.15s/batch]


		Epoch [61/100], Validation Loss: 0.7077


Epoch -> 62


Epoch 62/100: 100%|██████████| 469/469 [25:41<00:00,  3.29s/batch]


		Epoch [63/100], training Loss: 0.6837


Validation Epoch 62/100: 100%|██████████| 118/118 [04:14<00:00,  2.16s/batch]


		Epoch [62/100], Validation Loss: 0.7136


Epoch -> 63


Epoch 63/100: 100%|██████████| 469/469 [25:41<00:00,  3.29s/batch]


		Epoch [64/100], training Loss: 0.6797


Validation Epoch 63/100: 100%|██████████| 118/118 [04:14<00:00,  2.16s/batch]


		Epoch [63/100], Validation Loss: 0.7089


Epoch -> 64


Epoch 64/100: 100%|██████████| 469/469 [25:45<00:00,  3.30s/batch]


		Epoch [65/100], training Loss: 0.6762


Validation Epoch 64/100: 100%|██████████| 118/118 [04:15<00:00,  2.17s/batch]


		Epoch [64/100], Validation Loss: 0.7088


Epoch -> 65


Epoch 65/100: 100%|██████████| 469/469 [29:52<00:00,  3.82s/batch]


		Epoch [66/100], training Loss: 0.6726


Validation Epoch 65/100: 100%|██████████| 118/118 [06:27<00:00,  3.28s/batch]


		Epoch [65/100], Validation Loss: 0.7069


Epoch -> 66


Epoch 66/100: 100%|██████████| 469/469 [33:49<00:00,  4.33s/batch]


		Epoch [67/100], training Loss: 0.6691


Validation Epoch 66/100: 100%|██████████| 118/118 [06:26<00:00,  3.28s/batch]


		Epoch [66/100], Validation Loss: 0.7052


Epoch -> 67


Epoch 67/100: 100%|██████████| 469/469 [28:16<00:00,  3.62s/batch]


		Epoch [68/100], training Loss: 0.6658


Validation Epoch 67/100: 100%|██████████| 118/118 [06:28<00:00,  3.29s/batch]


		Epoch [67/100], Validation Loss: 0.7009


Epoch -> 68


Epoch 68/100: 100%|██████████| 469/469 [27:01<00:00,  3.46s/batch]


		Epoch [69/100], training Loss: 0.6626


Validation Epoch 68/100: 100%|██████████| 118/118 [04:12<00:00,  2.14s/batch]


		Epoch [68/100], Validation Loss: 0.7042


Epoch -> 69


Epoch 69/100: 100%|██████████| 469/469 [29:26<00:00,  3.77s/batch]


		Epoch [70/100], training Loss: 0.6603


Validation Epoch 69/100: 100%|██████████| 118/118 [04:13<00:00,  2.15s/batch]


		Epoch [69/100], Validation Loss: 0.6994


Epoch -> 70


Epoch 70/100: 100%|██████████| 469/469 [25:36<00:00,  3.28s/batch]


		Epoch [71/100], training Loss: 0.6580


Validation Epoch 70/100: 100%|██████████| 118/118 [04:12<00:00,  2.14s/batch]


		Epoch [70/100], Validation Loss: 0.6987


Epoch -> 71


Epoch 71/100: 100%|██████████| 469/469 [25:13<00:00,  3.23s/batch]


		Epoch [72/100], training Loss: 0.6548


Validation Epoch 71/100: 100%|██████████| 118/118 [04:12<00:00,  2.14s/batch]


		Epoch [71/100], Validation Loss: 0.7066


Epoch -> 72


Epoch 72/100: 100%|██████████| 469/469 [25:31<00:00,  3.27s/batch]


		Epoch [73/100], training Loss: 0.6513


Validation Epoch 72/100: 100%|██████████| 118/118 [04:13<00:00,  2.15s/batch]


		Epoch [72/100], Validation Loss: 0.6973


Epoch -> 73


Epoch 73/100: 100%|██████████| 469/469 [25:38<00:00,  3.28s/batch]


		Epoch [74/100], training Loss: 0.6468


Validation Epoch 73/100: 100%|██████████| 118/118 [04:12<00:00,  2.14s/batch]


		Epoch [73/100], Validation Loss: 0.6923


Epoch -> 74


Epoch 74/100: 100%|██████████| 469/469 [25:34<00:00,  3.27s/batch]


		Epoch [75/100], training Loss: 0.6443


Validation Epoch 74/100: 100%|██████████| 118/118 [04:13<00:00,  2.15s/batch]


		Epoch [74/100], Validation Loss: 0.6929


Epoch -> 75


Epoch 75/100: 100%|██████████| 469/469 [25:47<00:00,  3.30s/batch]


		Epoch [76/100], training Loss: 0.6420


Validation Epoch 75/100: 100%|██████████| 118/118 [04:14<00:00,  2.16s/batch]


		Epoch [75/100], Validation Loss: 0.6938


Epoch -> 76


Epoch 76/100: 100%|██████████| 469/469 [25:57<00:00,  3.32s/batch]


		Epoch [77/100], training Loss: 0.6393


Validation Epoch 76/100: 100%|██████████| 118/118 [06:28<00:00,  3.29s/batch]


		Epoch [76/100], Validation Loss: 0.6940


Epoch -> 77


Epoch 77/100: 100%|██████████| 469/469 [34:27<00:00,  4.41s/batch]


		Epoch [78/100], training Loss: 0.6351


Validation Epoch 77/100: 100%|██████████| 118/118 [05:47<00:00,  2.94s/batch]


		Epoch [77/100], Validation Loss: 0.6887


Epoch -> 78


Epoch 78/100: 100%|██████████| 469/469 [29:07<00:00,  3.73s/batch]


		Epoch [79/100], training Loss: 0.6318


Validation Epoch 78/100: 100%|██████████| 118/118 [06:37<00:00,  3.37s/batch]


		Epoch [78/100], Validation Loss: 0.6871


Epoch -> 79


Epoch 79/100: 100%|██████████| 469/469 [31:06<00:00,  3.98s/batch]


		Epoch [80/100], training Loss: 0.6320


Validation Epoch 79/100: 100%|██████████| 118/118 [04:12<00:00,  2.14s/batch]


		Epoch [79/100], Validation Loss: 0.6950


Epoch -> 80


Epoch 80/100: 100%|██████████| 469/469 [31:00<00:00,  3.97s/batch]


		Epoch [81/100], training Loss: 0.6275


Validation Epoch 80/100: 100%|██████████| 118/118 [04:11<00:00,  2.13s/batch]


		Epoch [80/100], Validation Loss: 0.6896


Epoch -> 81


Epoch 81/100: 100%|██████████| 469/469 [25:29<00:00,  3.26s/batch]


		Epoch [82/100], training Loss: 0.6253


Validation Epoch 81/100: 100%|██████████| 118/118 [04:22<00:00,  2.23s/batch]


		Epoch [81/100], Validation Loss: 0.6891


Epoch -> 82


Epoch 82/100: 100%|██████████| 469/469 [25:45<00:00,  3.29s/batch]


		Epoch [83/100], training Loss: 0.6220


Validation Epoch 82/100: 100%|██████████| 118/118 [04:12<00:00,  2.14s/batch]


		Epoch [82/100], Validation Loss: 0.6835


Epoch -> 83


Epoch 83/100: 100%|██████████| 469/469 [25:59<00:00,  3.33s/batch]


		Epoch [84/100], training Loss: 0.6204


Validation Epoch 83/100: 100%|██████████| 118/118 [06:06<00:00,  3.10s/batch]


		Epoch [83/100], Validation Loss: 0.6838


Epoch -> 84


Epoch 84/100: 100%|██████████| 469/469 [26:59<00:00,  3.45s/batch]


		Epoch [85/100], training Loss: 0.6169


Validation Epoch 84/100: 100%|██████████| 118/118 [04:18<00:00,  2.19s/batch]


		Epoch [84/100], Validation Loss: 0.6850


Epoch -> 85


Epoch 85/100: 100%|██████████| 469/469 [25:47<00:00,  3.30s/batch]


		Epoch [86/100], training Loss: 0.6180


Validation Epoch 85/100: 100%|██████████| 118/118 [04:26<00:00,  2.26s/batch]


		Epoch [85/100], Validation Loss: 0.6848


Epoch -> 86


Epoch 86/100: 100%|██████████| 469/469 [26:24<00:00,  3.38s/batch]


		Epoch [87/100], training Loss: 0.6138


Validation Epoch 86/100: 100%|██████████| 118/118 [04:13<00:00,  2.15s/batch]


		Epoch [86/100], Validation Loss: 0.6845


Epoch -> 87


Epoch 87/100: 100%|██████████| 469/469 [24:57<00:00,  3.19s/batch]


		Epoch [88/100], training Loss: 0.6098


Validation Epoch 87/100: 100%|██████████| 118/118 [04:12<00:00,  2.14s/batch]


		Epoch [87/100], Validation Loss: 0.6862


Epoch -> 88


Epoch 88/100: 100%|██████████| 469/469 [31:41<00:00,  4.05s/batch]


		Epoch [89/100], training Loss: 0.6080


Validation Epoch 88/100: 100%|██████████| 118/118 [06:27<00:00,  3.28s/batch]


		Epoch [88/100], Validation Loss: 0.6806


Epoch -> 89


Epoch 89/100: 100%|██████████| 469/469 [32:28<00:00,  4.16s/batch]


		Epoch [90/100], training Loss: 0.6068


Validation Epoch 89/100: 100%|██████████| 118/118 [04:12<00:00,  2.14s/batch]


		Epoch [89/100], Validation Loss: 0.6816


Epoch -> 90


Epoch 90/100: 100%|██████████| 469/469 [32:15<00:00,  4.13s/batch]


		Epoch [91/100], training Loss: 0.6036


Validation Epoch 90/100: 100%|██████████| 118/118 [06:26<00:00,  3.28s/batch]


		Epoch [90/100], Validation Loss: 0.6798


Epoch -> 91


Epoch 91/100: 100%|██████████| 469/469 [31:14<00:00,  4.00s/batch]


		Epoch [92/100], training Loss: 0.5998


Validation Epoch 91/100: 100%|██████████| 118/118 [04:11<00:00,  2.14s/batch]


		Epoch [91/100], Validation Loss: 0.6805


Epoch -> 92


Epoch 92/100: 100%|██████████| 469/469 [29:07<00:00,  3.73s/batch]


		Epoch [93/100], training Loss: 0.5978


Validation Epoch 92/100: 100%|██████████| 118/118 [04:13<00:00,  2.15s/batch]


		Epoch [92/100], Validation Loss: 0.6801


Epoch -> 93


Epoch 93/100: 100%|██████████| 469/469 [25:43<00:00,  3.29s/batch]


		Epoch [94/100], training Loss: 0.5968


Validation Epoch 93/100: 100%|██████████| 118/118 [04:12<00:00,  2.14s/batch]


		Epoch [93/100], Validation Loss: 0.6788


Epoch -> 94


Epoch 94/100: 100%|██████████| 469/469 [24:53<00:00,  3.18s/batch]


		Epoch [95/100], training Loss: 0.5958


Validation Epoch 94/100: 100%|██████████| 118/118 [04:12<00:00,  2.14s/batch]


		Epoch [94/100], Validation Loss: 0.6824


Epoch -> 95


Epoch 95/100: 100%|██████████| 469/469 [32:07<00:00,  4.11s/batch]


		Epoch [96/100], training Loss: 0.5921


Validation Epoch 95/100: 100%|██████████| 118/118 [06:26<00:00,  3.28s/batch]


		Epoch [95/100], Validation Loss: 0.6804


Epoch -> 96


Epoch 96/100: 100%|██████████| 469/469 [32:03<00:00,  4.10s/batch]


		Epoch [97/100], training Loss: 0.5900


Validation Epoch 96/100: 100%|██████████| 118/118 [04:11<00:00,  2.13s/batch]


		Epoch [96/100], Validation Loss: 0.6831


Epoch -> 97


Epoch 97/100: 100%|██████████| 469/469 [24:55<00:00,  3.19s/batch]


		Epoch [98/100], training Loss: 0.5883


Validation Epoch 97/100: 100%|██████████| 118/118 [04:12<00:00,  2.14s/batch]


		Epoch [97/100], Validation Loss: 0.6759


Epoch -> 98


Epoch 98/100: 100%|██████████| 469/469 [27:39<00:00,  3.54s/batch]


		Epoch [99/100], training Loss: 0.5866


Validation Epoch 98/100: 100%|██████████| 118/118 [04:11<00:00,  2.13s/batch]


		Epoch [98/100], Validation Loss: 0.6758


Epoch -> 99


Epoch 99/100: 100%|██████████| 469/469 [24:53<00:00,  3.18s/batch]


		Epoch [100/100], training Loss: 0.5851


Validation Epoch 99/100: 100%|██████████| 118/118 [04:12<00:00,  2.14s/batch]


		Epoch [99/100], Validation Loss: 0.6784


Epoch -> 100


Epoch 100/100: 100%|██████████| 469/469 [28:58<00:00,  3.71s/batch]


		Epoch [101/100], training Loss: 0.5825


Validation Epoch 100/100: 100%|██████████| 118/118 [04:15<00:00,  2.17s/batch]


		Epoch [100/100], Validation Loss: 0.6771




##### Evaluation


In [9]:
def translate(en_sentence):
    model.eval()
    en_sentence = (en_sentence,)
    hi_sentence = ("",)

    en_token = dataset.tokenize(en_sentence[0], dataset.english_to_index, start_token=False, end_token=False).unsqueeze(0).to(device)
    hi_token = dataset.tokenize(hi_sentence[0], dataset.hindi_to_index, start_token=True, end_token=False).unsqueeze(0).to(device)
    
    for word_counter in range(dataset.max_sequence_length):
        # print(f"Processing for {word_counter + 1} token")
    
        ds_mask, es_mask, edc_mask = get_masks(dataset, en_token, hi_token)
        ds_mask, es_mask, edc_mask = ds_mask.to(device), es_mask.to(device), edc_mask.to(device)
        
        en_embedded, hi_embedded = embeddings(en_token, hi_token)
        en_embedded, hi_embedded =  en_embedded.to(device), hi_embedded.to(device)
        
        predictions = model(en_embedded,
                            hi_embedded,
                            ds_mask, es_mask, edc_mask)
        next_token_prob_distribution = predictions[0][word_counter]
        next_token_index = torch.argmax(next_token_prob_distribution).item()
        next_token = dataset.index_to_hindi[next_token_index]
        
        if next_token == dataset.END_TOKEN:
            break
        hi_sentence = (hi_sentence[0] + next_token, )
        hi_token = dataset.tokenize(hi_sentence[0], dataset.hindi_to_index, start_token=True, end_token=False).unsqueeze(0).to(device)
        # print(f"\t\t\t Predicted till now: {hi_sentence[0]}")
    
    return hi_sentence[0]
    

In [10]:
en = dataset.untokenize(dataset[0][0], dataset.index_to_english)
hi = dataset.untokenize(dataset[0][1], dataset.index_to_hindi)
translation = translate(en)


In [11]:
print(f"en sentence : {en}")
print(f"actual translation : {hi}")
print(f"predicted translation : {translation}")

en sentence : In reply, Pakistan got off to a solid start.
actual translation : जिसके जवाब में पाक ने अच्छी शुरुआत की थी.
predicted translation : जवाब में पाकिस्तान ने एक ठोस शुरुआत की है।


In [12]:
line1 = "Hello, How are you?"
line2 = "This is a beautiful day to go out."
line3 = "India is situated on the right side of pakistan"
lines = [line1, line2, line3]

In [13]:
translations = []
for line in lines:
    translations.append(translate(line))

In [14]:
for index, (en, hi) in enumerate(zip(lines, translations)):
    print(f"{en} -> {hi}")

Hello, How are you? -> हैलो, तुम कैसे हो?
This is a beautiful day to go out. -> यह दिन बाहर जाने का बहुत खूबसूरत है।
India is situated on the right side of pakistan -> भारत पाकिस्तान के दाहिने तरफ से स्थित है।


#### Save dictionaries

In [15]:
import json

dict = {
    'en_vocab' : dataset.en_vocab,
    'hi_vocab' : dataset.hi_vocab,

    'en_to_index' : dataset.english_to_index,
    'index_to_en' : dataset.index_to_english,

    'hi_to_index' : dataset.hindi_to_index,
    'index_to_hi' : dataset.index_to_hindi,

}


# Save to a pickle file
with open('dicts.json', 'w') as f:
    json.dump(dict, f)
