In [1]:
!pip install -q torch==2.0.1 torchtext==0.15.2


In [2]:
!python -m spacy download fr_core_news_sm

Collecting fr-core-news-sm==3.7.0
  Downloading https://github.com/explosion/spacy-models/releases/download/fr_core_news_sm-3.7.0/fr_core_news_sm-3.7.0-py3-none-any.whl (16.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.3/16.3 MB[0m [31m59.5 MB/s[0m eta [36m0:00:00[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('fr_core_news_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [3]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/

cp: cannot stat 'kaggle.json': No such file or directory


In [4]:
!kaggle datasets download -d devicharith/language-translation-englishfrench

Dataset URL: https://www.kaggle.com/datasets/devicharith/language-translation-englishfrench
License(s): CC0-1.0
language-translation-englishfrench.zip: Skipping, found more recently modified local copy (use --force to force download)


In [5]:
import zipfile
zip_ref = zipfile.ZipFile('/content/language-translation-englishfrench.zip', 'r')
zip_ref.extractall('/content')
zip_ref.close()

In [6]:
import pandas as pd
import torch
import torch.nn as nn
import spacy
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import GloVe
from torchtext.datasets import AG_NEWS
from torch.utils.data import DataLoader
from torchtext.transforms import VocabTransform, PadTransform
from torchtext.vocab import build_vocab_from_iterator
import torch.nn.functional as F
import torch.optim as optim
from tqdm import tqdm
import random
from sklearn.model_selection import train_test_split

In [7]:
df = pd.read_csv('/content/eng_-french.csv')
df.head()

Unnamed: 0,English words/sentences,French words/sentences
0,Hi.,Salut!
1,Run!,Cours !
2,Run!,Courez !
3,Who?,Qui ?
4,Wow!,Ça alors !


In [8]:
def get_rows(i,j):
  return df.iloc[i:j, :]

df = get_rows(120000,150000)

In [9]:
df.shape

(30000, 2)

In [11]:
df.rename(columns={'English words/sentences':'English','French words/sentences':'French'},inplace=True)

In [12]:
max_length = df['French'].apply(lambda x: len(x.split())).max()

print(f"The maximum length of a sentence in the 'english' column is: {max_length}")

The maximum length of a sentence in the 'english' column is: 18


In [13]:
english_texts = df["English"].tolist()
french_summaries = df["French"].tolist()
train_en, test_en, train_fr, test_fr = train_test_split(english_texts, french_summaries, test_size=0.2, random_state=42)


In [14]:
import os
import urllib.request

def download_fasttext_vectors(url, dest_path):

    os.makedirs(os.path.dirname(dest_path), exist_ok=True)

    if not os.path.exists(dest_path):
        print(f"Downloading {url}...")
        urllib.request.urlretrieve(url, dest_path)
        print("Download completed.")
    else:
        print(f"{dest_path} already exists.")


english_vectors_url = "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.en.300.vec.gz"
french_vectors_url = "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.fr.300.vec.gz"


english_vectors_path = ".vector_cache/cc.en.300.vec.gz"
french_vectors_path = ".vector_cache/cc.fr.300.vec.gz"


download_fasttext_vectors(english_vectors_url, english_vectors_path)
download_fasttext_vectors(french_vectors_url, french_vectors_path)

.vector_cache/cc.en.300.vec.gz already exists.
.vector_cache/cc.fr.300.vec.gz already exists.


In [15]:
import gzip
import shutil

def extract_gz(source_path, dest_path):
    with gzip.open(source_path, 'rb') as f_in:
        with open(dest_path, 'wb') as f_out:
            shutil.copyfileobj(f_in, f_out)


extract_gz(".vector_cache/cc.en.300.vec.gz", ".vector_cache/cc.en.300.vec")
extract_gz(".vector_cache/cc.fr.300.vec.gz", ".vector_cache/cc.fr.300.vec")


In [16]:
from torchtext.vocab import Vectors

english_vectors_path = ".vector_cache/cc.en.300.vec"
french_vectors_path = ".vector_cache/cc.fr.300.vec"


english_vectors = Vectors(name=english_vectors_path)
french_vectors = Vectors(name=french_vectors_path)


In [17]:
spacy_en = spacy.load("en_core_web_sm")
spacy_fr = spacy.load("fr_core_news_sm")

In [18]:
def tokenize_en(text):
    return [tok.text.lower() for tok in spacy_en.tokenizer(text)]

def tokenize_fr(text):
    return [tok.text.lower() for tok in spacy_fr.tokenizer(text)]

tokenizer_en = get_tokenizer(tokenize_en)
tokenizer_fr = get_tokenizer(tokenize_fr)
def yield_tokens(data_iter, tokenizer):
    for text in data_iter:
        yield tokenizer(text)
all_data = english_texts + french_summaries
vocab_en = build_vocab_from_iterator(yield_tokens(english_texts, tokenize_en), specials=["<unk>", "<pad>"])
vocab_fr = build_vocab_from_iterator(yield_tokens(french_summaries, tokenize_fr), specials=["<unk>", "<pad>"])
vocab_en.set_default_index(vocab_en["<unk>"])
vocab_fr.set_default_index(vocab_fr["<unk>"])

In [19]:
class TranslationDataset(torch.utils.data.Dataset):
    def __init__(self, english_texts, french_texts):
        self.english_texts = english_texts
        self.french_texts = french_texts

    def __getitem__(self, idx):
        english_text = self.english_texts[idx]
        french_text = self.french_texts[idx]
        english_tensor = torch.tensor([vocab_en[token] for token in tokenize_en(english_text)], dtype=torch.long)
        french_tensor = torch.tensor([vocab_fr[token] for token in tokenize_fr(french_text)], dtype=torch.long)
        return english_tensor, french_tensor

    def __len__(self):
        return len(self.english_texts)

In [22]:
import torch.nn.functional as F

def collate_fn(batch):
    english_batch, french_batch = zip(*batch)

    max_length = max(max(len(seq) for seq in english_batch), max(len(seq) for seq in french_batch))


    english_batch = [F.pad(seq, (0, max_length - len(seq))) for seq in english_batch]
    french_batch = [F.pad(seq, (0, max_length - len(seq))) for seq in french_batch]

    return torch.stack(english_batch), torch.stack(french_batch)

In [23]:
train_dataset = TranslationDataset(train_en, train_fr)
test_dataset = TranslationDataset(test_en, test_fr)


train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False, collate_fn=collate_fn)

In [24]:
def create_embedding_matrix(vocab, vectors, embedding_dim):
    embedding_matrix = torch.zeros((len(vocab), embedding_dim))

    for idx, token in enumerate(vocab.get_itos()):
        if token in vectors.stoi:
            embedding_matrix[idx] = vectors[token]
        else:
            embedding_matrix[idx] = torch.randn(embedding_dim)

    return embedding_matrix


embedding_dim = english_vectors.dim
english_embedding_matrix = create_embedding_matrix(vocab_en, english_vectors, embedding_dim)
french_embedding_matrix = create_embedding_matrix(vocab_fr, french_vectors, embedding_dim)


class Encoder(nn.Module):
    def __init__(self, input_size, embedding_size, hidden_size, num_layers, p, pretrained_embeddings):
        super(Encoder, self).__init__()
        self.dropout = nn.Dropout(p)
        self.hidden_size = hidden_size
        self.num_layers = num_layers


        self.embedding = nn.Embedding(input_size, embedding_size)
        self.embedding.weight.data.copy_(pretrained_embeddings)


        self.embedding.weight.requires_grad = False

        self.rnn = nn.LSTM(embedding_size, hidden_size, num_layers, dropout=p)

    def forward(self, x):
        embedding = self.dropout(self.embedding(x))
        outputs, (hidden, cell) = self.rnn(embedding)
        return hidden, cell


class Decoder(nn.Module):
    def __init__(self, input_size, embedding_size, hidden_size, output_size, num_layers, p, pretrained_embeddings):
        super(Decoder, self).__init__()
        self.dropout = nn.Dropout(p)
        self.hidden_size = hidden_size
        self.num_layers = num_layers


        self.embedding = nn.Embedding(input_size, embedding_size)
        self.embedding.weight.data.copy_(pretrained_embeddings)


        self.embedding.weight.requires_grad = False

        self.rnn = nn.LSTM(embedding_size, hidden_size, num_layers, dropout=p)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden, cell):
        x = x.unsqueeze(0)
        embedding = self.dropout(self.embedding(x))
        outputs, (hidden, cell) = self.rnn(embedding, (hidden, cell))
        predictions = self.fc(outputs)
        predictions = predictions.squeeze(0)
        return predictions, hidden, cell


class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder):
        super(Seq2Seq, self).__init__()
        self.encoder = encoder
        self.decoder = decoder

    def forward(self, source, target, teacher_force_ratio=0.5):
        batch_size = source.shape[1]
        target_len = target.shape[0]
        target_vocab_size = len(vocab_fr)

        outputs = torch.zeros(target_len, batch_size, target_vocab_size).to(device)
        hidden, cell = self.encoder(source)
        x = target[0]

        for t in range(1, target_len):
            output, hidden, cell = self.decoder(x, hidden, cell)
            outputs[t] = output
            best_guess = output.argmax(1)
            x = target[t] if random.random() < teacher_force_ratio else best_guess

        return outputs

In [25]:
input_size_encoder = len(vocab_en)
input_size_decoder = len(vocab_fr)
output_size = len(vocab_fr)
embedding_size = embedding_dim
hidden_size = 512
num_layers = 2
dropout = 0.5


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")


english_embedding_matrix = english_embedding_matrix.to(device)
french_embedding_matrix = french_embedding_matrix.to(device)


encoder = Encoder(input_size_encoder, embedding_size, hidden_size, num_layers, dropout, english_embedding_matrix).to(device)
decoder = Decoder(input_size_decoder, embedding_size, hidden_size, output_size, num_layers, dropout, french_embedding_matrix).to(device)
model = Seq2Seq(encoder, decoder).to(device)


criterion = nn.CrossEntropyLoss(ignore_index=vocab_fr['<pad>']).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)


num_epochs = 120
for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0

    for english_batch, french_batch in tqdm(train_loader, desc=f"Epoch {epoch + 1}/{num_epochs}"):

        english_batch = english_batch.to(device)
        french_batch = french_batch.to(device)


        optimizer.zero_grad()
        output = model(english_batch, french_batch)


        output = output[1:].reshape(-1, output.shape[2])
        target = french_batch[1:].reshape(-1)


        loss = criterion(output, target)
        loss.backward()


        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)
        optimizer.step()


        epoch_loss += loss.item()


    avg_epoch_loss = epoch_loss / len(train_loader)
    print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {avg_epoch_loss:.4f}")


Using device: cuda


Epoch 1/120: 100%|██████████| 3000/3000 [01:07<00:00, 44.51it/s]


Epoch [1/120], Loss: 4.7711


Epoch 2/120: 100%|██████████| 3000/3000 [01:07<00:00, 44.75it/s]


Epoch [2/120], Loss: 4.5715


Epoch 3/120: 100%|██████████| 3000/3000 [01:09<00:00, 43.38it/s]


Epoch [3/120], Loss: 4.4492


Epoch 4/120: 100%|██████████| 3000/3000 [01:07<00:00, 44.58it/s]


Epoch [4/120], Loss: 4.3580


Epoch 5/120: 100%|██████████| 3000/3000 [01:07<00:00, 44.49it/s]


Epoch [5/120], Loss: 4.2782


Epoch 6/120: 100%|██████████| 3000/3000 [01:12<00:00, 41.10it/s]


Epoch [6/120], Loss: 4.2302


Epoch 7/120: 100%|██████████| 3000/3000 [01:07<00:00, 44.49it/s]


Epoch [7/120], Loss: 4.1868


Epoch 8/120: 100%|██████████| 3000/3000 [01:08<00:00, 43.83it/s]


Epoch [8/120], Loss: 4.1528


Epoch 9/120: 100%|██████████| 3000/3000 [01:13<00:00, 41.00it/s]


Epoch [9/120], Loss: 4.1206


Epoch 10/120: 100%|██████████| 3000/3000 [01:12<00:00, 41.45it/s]


Epoch [10/120], Loss: 4.0886


Epoch 11/120: 100%|██████████| 3000/3000 [01:13<00:00, 40.90it/s]


Epoch [11/120], Loss: 4.0559


Epoch 12/120: 100%|██████████| 3000/3000 [01:08<00:00, 43.91it/s]


Epoch [12/120], Loss: 4.0254


Epoch 13/120: 100%|██████████| 3000/3000 [01:08<00:00, 44.03it/s]


Epoch [13/120], Loss: 4.0040


Epoch 14/120: 100%|██████████| 3000/3000 [01:08<00:00, 44.11it/s]


Epoch [14/120], Loss: 3.9691


Epoch 15/120: 100%|██████████| 3000/3000 [01:07<00:00, 44.39it/s]


Epoch [15/120], Loss: 3.9536


Epoch 16/120: 100%|██████████| 3000/3000 [01:08<00:00, 43.54it/s]


Epoch [16/120], Loss: 3.9182


Epoch 17/120: 100%|██████████| 3000/3000 [01:07<00:00, 44.24it/s]


Epoch [17/120], Loss: 3.9036


Epoch 18/120: 100%|██████████| 3000/3000 [01:07<00:00, 44.57it/s]


Epoch [18/120], Loss: 3.8875


Epoch 19/120: 100%|██████████| 3000/3000 [01:08<00:00, 43.80it/s]


Epoch [19/120], Loss: 3.8626


Epoch 20/120: 100%|██████████| 3000/3000 [01:08<00:00, 44.04it/s]


Epoch [20/120], Loss: 3.8417


Epoch 21/120: 100%|██████████| 3000/3000 [01:08<00:00, 44.06it/s]


Epoch [21/120], Loss: 3.8357


Epoch 22/120: 100%|██████████| 3000/3000 [01:07<00:00, 44.34it/s]


Epoch [22/120], Loss: 3.8180


Epoch 23/120: 100%|██████████| 3000/3000 [01:07<00:00, 44.55it/s]


Epoch [23/120], Loss: 3.7948


Epoch 24/120: 100%|██████████| 3000/3000 [01:07<00:00, 44.31it/s]


Epoch [24/120], Loss: 3.7814


Epoch 25/120: 100%|██████████| 3000/3000 [01:09<00:00, 43.02it/s]


Epoch [25/120], Loss: 3.7768


Epoch 26/120: 100%|██████████| 3000/3000 [01:07<00:00, 44.14it/s]


Epoch [26/120], Loss: 3.7664


Epoch 27/120: 100%|██████████| 3000/3000 [01:06<00:00, 44.82it/s]


Epoch [27/120], Loss: 3.7418


Epoch 28/120: 100%|██████████| 3000/3000 [01:06<00:00, 45.02it/s]


Epoch [28/120], Loss: 3.7351


Epoch 29/120: 100%|██████████| 3000/3000 [01:06<00:00, 45.04it/s]


Epoch [29/120], Loss: 3.7292


Epoch 30/120: 100%|██████████| 3000/3000 [01:08<00:00, 43.93it/s]


Epoch [30/120], Loss: 3.7133


Epoch 31/120: 100%|██████████| 3000/3000 [01:08<00:00, 43.97it/s]


Epoch [31/120], Loss: 3.6993


Epoch 32/120: 100%|██████████| 3000/3000 [01:07<00:00, 44.71it/s]


Epoch [32/120], Loss: 3.6971


Epoch 33/120: 100%|██████████| 3000/3000 [01:08<00:00, 43.76it/s]


Epoch [33/120], Loss: 3.6786


Epoch 34/120: 100%|██████████| 3000/3000 [01:08<00:00, 43.50it/s]


Epoch [34/120], Loss: 3.6802


Epoch 35/120: 100%|██████████| 3000/3000 [01:07<00:00, 44.13it/s]


Epoch [35/120], Loss: 3.6593


Epoch 36/120: 100%|██████████| 3000/3000 [01:06<00:00, 44.84it/s]


Epoch [36/120], Loss: 3.6516


Epoch 37/120: 100%|██████████| 3000/3000 [01:06<00:00, 45.05it/s]


Epoch [37/120], Loss: 3.6492


Epoch 38/120: 100%|██████████| 3000/3000 [01:06<00:00, 45.17it/s]


Epoch [38/120], Loss: 3.6408


Epoch 39/120: 100%|██████████| 3000/3000 [01:06<00:00, 45.10it/s]


Epoch [39/120], Loss: 3.6368


Epoch 40/120: 100%|██████████| 3000/3000 [01:06<00:00, 45.15it/s]


Epoch [40/120], Loss: 3.6247


Epoch 41/120: 100%|██████████| 3000/3000 [01:06<00:00, 45.07it/s]


Epoch [41/120], Loss: 3.6148


Epoch 42/120: 100%|██████████| 3000/3000 [01:06<00:00, 45.11it/s]


Epoch [42/120], Loss: 3.6116


Epoch 43/120: 100%|██████████| 3000/3000 [01:06<00:00, 45.25it/s]


Epoch [43/120], Loss: 3.6150


Epoch 44/120: 100%|██████████| 3000/3000 [01:06<00:00, 45.05it/s]


Epoch [44/120], Loss: 3.5939


Epoch 45/120: 100%|██████████| 3000/3000 [01:06<00:00, 45.20it/s]


Epoch [45/120], Loss: 3.5845


Epoch 46/120: 100%|██████████| 3000/3000 [01:06<00:00, 45.18it/s]


Epoch [46/120], Loss: 3.5835


Epoch 47/120: 100%|██████████| 3000/3000 [01:06<00:00, 45.25it/s]


Epoch [47/120], Loss: 3.5662


Epoch 48/120: 100%|██████████| 3000/3000 [01:08<00:00, 43.98it/s]


Epoch [48/120], Loss: 3.5692


Epoch 49/120: 100%|██████████| 3000/3000 [01:11<00:00, 41.74it/s]


Epoch [49/120], Loss: 3.5620


Epoch 50/120: 100%|██████████| 3000/3000 [01:10<00:00, 42.36it/s]


Epoch [50/120], Loss: 3.5505


Epoch 51/120: 100%|██████████| 3000/3000 [01:11<00:00, 41.88it/s]


Epoch [51/120], Loss: 3.5594


Epoch 52/120: 100%|██████████| 3000/3000 [01:12<00:00, 41.36it/s]


Epoch [52/120], Loss: 3.5512


Epoch 53/120: 100%|██████████| 3000/3000 [01:11<00:00, 41.80it/s]


Epoch [53/120], Loss: 3.5425


Epoch 54/120: 100%|██████████| 3000/3000 [01:10<00:00, 42.41it/s]


Epoch [54/120], Loss: 3.5316


Epoch 55/120: 100%|██████████| 3000/3000 [01:11<00:00, 41.99it/s]


Epoch [55/120], Loss: 3.5357


Epoch 56/120: 100%|██████████| 3000/3000 [01:12<00:00, 41.52it/s]


Epoch [56/120], Loss: 3.5295


Epoch 57/120: 100%|██████████| 3000/3000 [01:13<00:00, 41.06it/s]


Epoch [57/120], Loss: 3.5282


Epoch 58/120: 100%|██████████| 3000/3000 [01:15<00:00, 39.67it/s]


Epoch [58/120], Loss: 3.5210


Epoch 59/120: 100%|██████████| 3000/3000 [01:15<00:00, 39.58it/s]


Epoch [59/120], Loss: 3.5130


Epoch 60/120: 100%|██████████| 3000/3000 [01:15<00:00, 39.57it/s]


Epoch [60/120], Loss: 3.5099


Epoch 61/120: 100%|██████████| 3000/3000 [01:16<00:00, 39.44it/s]


Epoch [61/120], Loss: 3.5030


Epoch 62/120: 100%|██████████| 3000/3000 [01:15<00:00, 39.70it/s]


Epoch [62/120], Loss: 3.5027


Epoch 63/120: 100%|██████████| 3000/3000 [01:16<00:00, 39.46it/s]


Epoch [63/120], Loss: 3.5071


Epoch 64/120: 100%|██████████| 3000/3000 [01:15<00:00, 39.80it/s]


Epoch [64/120], Loss: 3.4971


Epoch 65/120: 100%|██████████| 3000/3000 [01:16<00:00, 39.33it/s]


Epoch [65/120], Loss: 3.4975


Epoch 66/120: 100%|██████████| 3000/3000 [01:14<00:00, 40.04it/s]


Epoch [66/120], Loss: 3.4904


Epoch 67/120: 100%|██████████| 3000/3000 [01:09<00:00, 43.32it/s]


Epoch [67/120], Loss: 3.4865


Epoch 68/120: 100%|██████████| 3000/3000 [01:08<00:00, 43.86it/s]


Epoch [68/120], Loss: 3.4845


Epoch 69/120: 100%|██████████| 3000/3000 [01:08<00:00, 43.92it/s]


Epoch [69/120], Loss: 3.4788


Epoch 70/120: 100%|██████████| 3000/3000 [01:06<00:00, 44.93it/s]


Epoch [70/120], Loss: 3.4729


Epoch 71/120: 100%|██████████| 3000/3000 [01:05<00:00, 45.57it/s]


Epoch [71/120], Loss: 3.4748


Epoch 72/120: 100%|██████████| 3000/3000 [01:05<00:00, 45.50it/s]


Epoch [72/120], Loss: 3.4645


Epoch 73/120: 100%|██████████| 3000/3000 [01:05<00:00, 45.59it/s]


Epoch [73/120], Loss: 3.4567


Epoch 74/120: 100%|██████████| 3000/3000 [01:07<00:00, 44.72it/s]


Epoch [74/120], Loss: 3.4606


Epoch 75/120: 100%|██████████| 3000/3000 [01:07<00:00, 44.56it/s]


Epoch [75/120], Loss: 3.4596


Epoch 76/120: 100%|██████████| 3000/3000 [01:07<00:00, 44.35it/s]


Epoch [76/120], Loss: 3.4577


Epoch 77/120: 100%|██████████| 3000/3000 [01:07<00:00, 44.66it/s]


Epoch [77/120], Loss: 3.4506


Epoch 78/120: 100%|██████████| 3000/3000 [01:07<00:00, 44.76it/s]


Epoch [78/120], Loss: 3.4483


Epoch 79/120: 100%|██████████| 3000/3000 [01:06<00:00, 45.01it/s]


Epoch [79/120], Loss: 3.4480


Epoch 80/120: 100%|██████████| 3000/3000 [01:06<00:00, 45.12it/s]


Epoch [80/120], Loss: 3.4444


Epoch 81/120: 100%|██████████| 3000/3000 [01:06<00:00, 45.19it/s]


Epoch [81/120], Loss: 3.4407


Epoch 82/120: 100%|██████████| 3000/3000 [01:06<00:00, 45.10it/s]


Epoch [82/120], Loss: 3.4389


Epoch 83/120: 100%|██████████| 3000/3000 [01:08<00:00, 44.04it/s]


Epoch [83/120], Loss: 3.4312


Epoch 84/120: 100%|██████████| 3000/3000 [01:06<00:00, 44.81it/s]


Epoch [84/120], Loss: 3.4273


Epoch 85/120: 100%|██████████| 3000/3000 [01:06<00:00, 44.92it/s]


Epoch [85/120], Loss: 3.4405


Epoch 86/120: 100%|██████████| 3000/3000 [01:06<00:00, 45.03it/s]


Epoch [86/120], Loss: 3.4290


Epoch 87/120: 100%|██████████| 3000/3000 [01:06<00:00, 44.98it/s]


Epoch [87/120], Loss: 3.4274


Epoch 88/120: 100%|██████████| 3000/3000 [01:06<00:00, 44.94it/s]


Epoch [88/120], Loss: 3.4223


Epoch 89/120: 100%|██████████| 3000/3000 [01:06<00:00, 45.04it/s]


Epoch [89/120], Loss: 3.4219


Epoch 90/120: 100%|██████████| 3000/3000 [01:07<00:00, 44.78it/s]


Epoch [90/120], Loss: 3.4215


Epoch 91/120: 100%|██████████| 3000/3000 [01:08<00:00, 43.89it/s]


Epoch [91/120], Loss: 3.4309


Epoch 92/120: 100%|██████████| 3000/3000 [01:08<00:00, 44.04it/s]


Epoch [92/120], Loss: 3.4199


Epoch 93/120: 100%|██████████| 3000/3000 [01:07<00:00, 44.45it/s]


Epoch [93/120], Loss: 3.4159


Epoch 94/120: 100%|██████████| 3000/3000 [01:07<00:00, 44.41it/s]


Epoch [94/120], Loss: 3.4106


Epoch 95/120: 100%|██████████| 3000/3000 [01:08<00:00, 43.94it/s]


Epoch [95/120], Loss: 3.4137


Epoch 96/120: 100%|██████████| 3000/3000 [01:09<00:00, 43.41it/s]


Epoch [96/120], Loss: 3.4070


Epoch 97/120: 100%|██████████| 3000/3000 [01:08<00:00, 44.02it/s]


Epoch [97/120], Loss: 3.4055


Epoch 98/120: 100%|██████████| 3000/3000 [01:10<00:00, 42.69it/s]


Epoch [98/120], Loss: 3.3970


Epoch 99/120: 100%|██████████| 3000/3000 [01:08<00:00, 43.64it/s]


Epoch [99/120], Loss: 3.4001


Epoch 100/120: 100%|██████████| 3000/3000 [01:07<00:00, 44.31it/s]


Epoch [100/120], Loss: 3.3986


Epoch 101/120: 100%|██████████| 3000/3000 [01:08<00:00, 43.97it/s]


Epoch [101/120], Loss: 3.4014


Epoch 102/120: 100%|██████████| 3000/3000 [01:08<00:00, 43.93it/s]


Epoch [102/120], Loss: 3.3954


Epoch 103/120: 100%|██████████| 3000/3000 [01:09<00:00, 43.40it/s]


Epoch [103/120], Loss: 3.4017


Epoch 104/120: 100%|██████████| 3000/3000 [01:08<00:00, 43.69it/s]


Epoch [104/120], Loss: 3.3940


Epoch 105/120: 100%|██████████| 3000/3000 [01:08<00:00, 43.90it/s]


Epoch [105/120], Loss: 3.3935


Epoch 106/120: 100%|██████████| 3000/3000 [01:09<00:00, 43.05it/s]


Epoch [106/120], Loss: 3.3908


Epoch 107/120: 100%|██████████| 3000/3000 [01:12<00:00, 41.41it/s]


Epoch [107/120], Loss: 3.3851


Epoch 108/120: 100%|██████████| 3000/3000 [01:12<00:00, 41.41it/s]


Epoch [108/120], Loss: 3.3898


Epoch 109/120: 100%|██████████| 3000/3000 [01:12<00:00, 41.35it/s]


Epoch [109/120], Loss: 3.3794


Epoch 110/120: 100%|██████████| 3000/3000 [01:12<00:00, 41.60it/s]


Epoch [110/120], Loss: 3.3857


Epoch 111/120: 100%|██████████| 3000/3000 [01:12<00:00, 41.37it/s]


Epoch [111/120], Loss: 3.3757


Epoch 112/120: 100%|██████████| 3000/3000 [01:16<00:00, 39.22it/s]


Epoch [112/120], Loss: 3.3863


Epoch 113/120: 100%|██████████| 3000/3000 [01:12<00:00, 41.45it/s]


Epoch [113/120], Loss: 3.3778


Epoch 114/120: 100%|██████████| 3000/3000 [01:12<00:00, 41.50it/s]


Epoch [114/120], Loss: 3.3682


Epoch 115/120: 100%|██████████| 3000/3000 [01:20<00:00, 37.16it/s]


Epoch [115/120], Loss: 3.3716


Epoch 116/120: 100%|██████████| 3000/3000 [01:19<00:00, 37.91it/s]


Epoch [116/120], Loss: 3.3657


Epoch 117/120: 100%|██████████| 3000/3000 [01:24<00:00, 35.54it/s]


Epoch [117/120], Loss: 3.3728


Epoch 118/120: 100%|██████████| 3000/3000 [01:16<00:00, 39.13it/s]


Epoch [118/120], Loss: 3.3678


Epoch 119/120: 100%|██████████| 3000/3000 [01:13<00:00, 40.87it/s]


Epoch [119/120], Loss: 3.3685


Epoch 120/120: 100%|██████████| 3000/3000 [01:12<00:00, 41.48it/s]

Epoch [120/120], Loss: 3.3718





In [26]:
def evaluate_model(model, data_loader, criterion):
    model.eval()
    epoch_loss = 0

    with torch.no_grad():
        for english_batch, french_batch in data_loader:
            english_batch = english_batch.to(device)
            french_batch = french_batch.to(device)


            output = model(english_batch, french_batch)


            output = output[1:].reshape(-1, output.shape[2])
            target = french_batch[1:].reshape(-1)


            loss = criterion(output, target)
            epoch_loss += loss.item()

    avg_loss = epoch_loss / len(data_loader)
    return avg_loss


test_loss = evaluate_model(model, test_loader, criterion)
print(f"Test Loss: {test_loss:.4f}")

Test Loss: 4.2269


In [27]:
from torchtext.data.metrics import bleu_score

def calculate_bleu(data_loader, model, vocab_fr, max_len=50):
    model.eval()
    targets = []
    outputs = []

    with torch.no_grad():
        for english_batch, french_batch in data_loader:
            english_batch = english_batch.to(device)
            french_batch = french_batch.to(device)


            output = model(english_batch, french_batch, 0)


            output = output.argmax(2)
            for i in range(output.shape[0]):
                predicted_tokens = [vocab_fr.get_itos()[token] for token in output[i].cpu().numpy()]
                target_tokens = [vocab_fr.get_itos()[token] for token in french_batch[i].cpu().numpy()]


                predicted_tokens = [token for token in predicted_tokens if token not in ['<sos>', '<eos>', '<pad>']]
                target_tokens = [token for token in target_tokens if token not in ['<sos>', '<eos>', '<pad>']]

                outputs.append(predicted_tokens)
                targets.append([target_tokens])


    bleu = bleu_score(outputs, targets)
    return bleu


bleu = calculate_bleu(test_loader, model, vocab_fr)
print(f"BLEU Score: {bleu:.4f}")


BLEU Score: 0.2574
