<a href="https://colab.research.google.com/github/dhanush852/intro_to_deeplearning/blob/main/HW4/RTML4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### **1**

In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
import random

# Sample dataset of English-French sentence pairs (trimmed for brevity)
data_pairs = [
    ("I am cold", "J'ai froid"),
    ("You are tired", "Tu es fatigué"),
    ("He is hungry", "Il a faim"),
    ("She is happy", "Elle est heureuse"),
    ("We are friends", "Nous sommes amis"),
    ("They are students", "Ils sont étudiants"),
    ("The cat is sleeping", "Le chat dort"),
    ("The sun is shining", "Le soleil brille"),
    ("We love music", "Nous aimons la musique"),
    ("She speaks French fluently", "Elle parle français couramment"),
    ("He enjoys reading books", "Il aime lire des livres"),
    ("They play soccer every weekend", "Ils jouent au football chaque week-end"),
    ("The movie starts at 7 PM", "Le film commence à 19 heures"),
    ("She wears a red dress", "Elle porte une robe rouge"),
    ("We cook dinner together", "Nous cuisinons le dîner ensemble"),
    ("He drives a blue car", "Il conduit une voiture bleue"),
    ("They visit museums often", "Ils visitent souvent des musées"),
    ("The restaurant serves delicious food", "Le restaurant sert une délicieuse cuisine"),
    ("She studies mathematics at university", "Elle étudie les mathématiques à l'université"),
    ("We watch movies on Fridays", "Nous regardons des films le vendredi"),
    ("He listens to music while jogging", "Il écoute de la musique en faisant du jogging"),
    ("They travel around the world", "Ils voyagent autour du monde"),
    ("The book is on the table", "Le livre est sur la table"),
    ("She dances gracefully", "Elle danse avec grâce"),
    ("We celebrate birthdays with cake", "Nous célébrons les anniversaires avec un gâteau"),
    ("He works hard every day", "Il travaille dur tous les jours"),
    ("They speak different languages", "Ils parlent différentes langues"),
    ("The flowers bloom in spring", "Les fleurs fleurissent au printemps"),
    ("She writes poetry in her free time", "Elle écrit de la poésie pendant son temps libre"),
    ("We learn something new every day", "Nous apprenons quelque chose de nouveau chaque jour"),
    ("The dog barks loudly", "Le chien aboie bruyamment"),
    ("He sings beautifully", "Il chante magnifiquement"),
    ("They swim in the pool", "Ils nagent dans la piscine"),
    ("The birds chirp in the morning", "Les oiseaux gazouillent le matin"),
    ("She teaches English at school", "Elle enseigne l'anglais à l'école"),
    ("We eat breakfast together", "Nous prenons le petit déjeuner ensemble"),
    ("He paints landscapes", "Il peint des paysages"),
    ("They laugh at the joke", "Ils rient de la blague"),
    ("The clock ticks loudly", "L'horloge tic-tac bruyamment"),
    ("She runs in the park", "Elle court dans le parc"),
    ("We travel by train", "Nous voyageons en train"),
    ("He writes a letter", "Il écrit une lettre"),
    ("They read books at the library", "Ils lisent des livres à la bibliothèque"),
    ("The baby cries", "Le bébé pleure"),
    ("She studies hard for exams", "Elle étudie dur pour les examens"),
    ("We plant flowers in the garden", "Nous plantons des fleurs dans le jardin"),
    ("He fixes the car", "Il répare la voiture"),
    ("They drink coffee in the morning", "Ils boivent du café le matin"),
    ("The sun sets in the evening", "Le soleil se couche le soir"),
    ("She dances at the party", "Elle danse à la fête"),
    ("We play music at the concert", "Nous jouons de la musique au concert"),
    ("He cooks dinner for his family", "Il cuisine le dîner pour sa famille"),
    ("They study French grammar", "Ils étudient la grammaire française"),
    ("The rain falls gently", "La pluie tombe doucement"),
    ("She sings a song", "Elle chante une chanson"),
    ("We watch a movie together", "Nous regardons un film ensemble"),
    ("He sleeps deeply", "Il dort profondément"),
    ("They travel to Paris", "Ils voyagent à Paris"),
    ("The children play in the park", "Les enfants jouent dans le parc"),
    ("She walks along the beach", "Elle se promène le long de la plage"),
    ("We talk on the phone", "Nous parlons au téléphone"),
    ("He waits for the bus", "Il attend le bus"),
    ("They visit the Eiffel Tower", "Ils visitent la tour Eiffel"),
    ("The stars twinkle at night", "Les étoiles scintillent la nuit"),
    ("She dreams of flying", "Elle rêve de voler"),
    ("We work in the office", "Nous travaillons au bureau"),
    ("He studies history", "Il étudie l'histoire"),
    ("They listen to the radio", "Ils écoutent la radio"),
    ("The wind blows gently", "Le vent souffle doucement"),
    ("She swims in the ocean", "Elle nage dans l'océan"),
    ("We dance at the wedding", "Nous dansons au mariage"),
    ("He climbs the mountain", "Il gravit la montagne"),
    ("They hike in the forest", "Ils font de la randonnée dans la forêt"),
    ("The cat meows loudly", "Le chat miaule bruyamment"),
    ("She paints a picture", "Elle peint un tableau"),
    ("We build a sandcastle", "Nous construisons un château de sable"),
    ("He sings in the choir", "Il chante dans le chœur")
]

def split_dataset(data_pairs, split_ratio=0.8):
    random.shuffle(data_pairs)
    split_point = int(len(data_pairs) * split_ratio)
    train_pairs = data_pairs[:split_point]
    val_pairs = data_pairs[split_point:]
    return train_pairs, val_pairs

# Vocabulary Construction
def construct_vocab(pairs):
    vocab = {"SOS": 0, "EOS": 1}
    for eng, fr in pairs:
        for word in eng.split() + fr.split():
            if word not in vocab:
                vocab[word] = len(vocab)
    return vocab, {idx: word for word, idx in vocab.items()}

vocab, idx_to_word = construct_vocab(data_pairs)

# Dataset and DataLoader
class TranslationDataset(Dataset):
    def __init__(self, pairs, vocab):
        self.pairs = pairs
        self.vocab = vocab

    def __len__(self):
        return len(self.pairs)

    def __getitem__(self, idx):
        eng, fr = self.pairs[idx]
        eng_idx = [self.vocab[word] for word in eng.split()] + [self.vocab["EOS"]]
        fr_idx = [self.vocab[word] for word in fr.split()] + [self.vocab["EOS"]]
        return torch.tensor(eng_idx, dtype=torch.long), torch.tensor(fr_idx, dtype=torch.long)

# Split the dataset into training and validation sets
train_pairs, val_pairs = split_dataset(data_pairs)
train_dataset = TranslationDataset(train_pairs, vocab)
val_dataset = TranslationDataset(val_pairs, vocab)

train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False)

# Model Definitions
class Encoder(nn.Module):
    def __init__(self, vocab_size, hidden_size):
        super(Encoder, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(vocab_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output, hidden = self.gru(embedded, hidden)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size)

class Decoder(nn.Module):
    def __init__(self, hidden_size, vocab_size):
        super(Decoder, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(vocab_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)
        self.out = nn.Linear(hidden_size, vocab_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output = torch.relu(embedded)
        output, hidden = self.gru(output, hidden)
        output = self.softmax(self.out(output[0]))
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size)

# Initialize models, optimizers, and loss function
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
encoder = Encoder(len(vocab), 256).to(device)
decoder = Decoder(256, len(vocab)).to(device)
encoder_optimizer = optim.SGD(encoder.parameters(), lr=0.01)
decoder_optimizer = optim.SGD(decoder.parameters(), lr=0.01)
criterion = nn.NLLLoss()




In [8]:
# Training Function
def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion):
    encoder_hidden = encoder.initHidden().to(device)

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    loss = 0

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(input_tensor[ei].to(device), encoder_hidden)

    decoder_input = torch.tensor([[vocab["SOS"]]], device=device)

    decoder_hidden = encoder_hidden

    for di in range(target_length):
        decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
        topv, topi = decoder_output.topk(1)
        decoder_input = topi.squeeze().detach()

        loss += criterion(decoder_output, target_tensor[di].to(device))
        if decoder_input.item() == vocab["EOS"]:
            break

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length



In [9]:
# Validation Function
# Simplified Training Function Example
def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=10):
    encoder_hidden = encoder.initHidden().to(device)

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    loss = 0

    # Encode
    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(input_tensor[ei].unsqueeze(0), encoder_hidden)

    decoder_input = torch.tensor([[vocab["SOS"]]], device=device)

    # Assume teacher forcing for simplicity
    decoder_hidden = encoder_hidden
    for di in range(target_length):
        decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
        loss += criterion(decoder_output, target_tensor[di].unsqueeze(0))
        decoder_input = target_tensor[di]  # Teacher forcing

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length

In [10]:


# Main Training Loop Adjustments
def validate(encoder, decoder, dataloader, criterion):
    encoder.eval()
    decoder.eval()
    total_loss = 0
    correct_tokens = 0
    total_tokens = 0

    with torch.no_grad():
        for input_tensor, target_tensor in dataloader:
            input_tensor, target_tensor = input_tensor[0].to(device), target_tensor[0].to(device)
            encoder_hidden = encoder.initHidden().to(device)

            # Encoding
            for ei in range(input_tensor.size(0)):
                encoder_output, encoder_hidden = encoder(input_tensor[ei].unsqueeze(0), encoder_hidden)

            decoder_input = torch.tensor([[vocab["SOS"]]], device=device)
            decoder_hidden = encoder_hidden

            loss = 0

            # Decoding
            for di in range(target_tensor.size(0)):
                decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
                topv, topi = decoder_output.topk(1)
                decoder_input = topi.squeeze().detach()

                loss += criterion(decoder_output, target_tensor[di].unsqueeze(0)).item()

                # Calculate accuracy
                if decoder_input.item() == target_tensor[di]:
                    correct_tokens += 1
                total_tokens += 1

                if decoder_input.item() == vocab["EOS"]:
                    break

            total_loss += loss / target_tensor.size(0)

    avg_loss = total_loss / len(dataloader)
    accuracy = correct_tokens / total_tokens
    return avg_loss, accuracy

In [11]:
# Main training loop
for epoch in range(100):
    total_loss = 0
    for input_tensor, target_tensor in train_loader:
        input_tensor, target_tensor = input_tensor.to(device), target_tensor.to(device)
        loss = train(input_tensor.squeeze(), target_tensor.squeeze(), encoder, decoder, encoder_optimizer, decoder_optimizer, criterion)
        total_loss += loss

    avg_train_loss = total_loss / len(train_loader)
    print(f'Epoch {epoch+1}, Train Loss: {avg_train_loss:.4f}')

# After all epochs, evaluate on the validation set
val_loss, val_accuracy = validate(encoder, decoder, val_loader, criterion)
print(f'Average Validation Loss: {val_loss:.4f}, Accuracy: {val_accuracy:.4f}')


Epoch 1, Train Loss: 5.0234
Epoch 2, Train Loss: 4.2484
Epoch 3, Train Loss: 3.9252
Epoch 4, Train Loss: 3.6648
Epoch 5, Train Loss: 3.4008
Epoch 6, Train Loss: 3.1267
Epoch 7, Train Loss: 2.8201
Epoch 8, Train Loss: 2.5398
Epoch 9, Train Loss: 2.2649
Epoch 10, Train Loss: 1.9895
Epoch 11, Train Loss: 1.7290
Epoch 12, Train Loss: 1.5162
Epoch 13, Train Loss: 1.3323
Epoch 14, Train Loss: 1.1786
Epoch 15, Train Loss: 1.0612
Epoch 16, Train Loss: 0.9582
Epoch 17, Train Loss: 0.8712
Epoch 18, Train Loss: 0.8074
Epoch 19, Train Loss: 0.7496
Epoch 20, Train Loss: 0.6983
Epoch 21, Train Loss: 0.6491
Epoch 22, Train Loss: 0.6052
Epoch 23, Train Loss: 0.5631
Epoch 24, Train Loss: 0.5242
Epoch 25, Train Loss: 0.4874
Epoch 26, Train Loss: 0.4452
Epoch 27, Train Loss: 0.4129
Epoch 28, Train Loss: 0.3790
Epoch 29, Train Loss: 0.3426
Epoch 30, Train Loss: 0.3108
Epoch 31, Train Loss: 0.2831
Epoch 32, Train Loss: 0.2539
Epoch 33, Train Loss: 0.2290
Epoch 34, Train Loss: 0.2071
Epoch 35, Train Loss: 0

In [12]:
def validate_and_show_examples(encoder, decoder, dataloader, criterion, idx_to_word, num_examples=5):
    encoder.eval()
    decoder.eval()
    total_loss = 0
    correct_tokens = 0
    total_tokens = 0
    examples = []

    with torch.no_grad():
        for i, (input_tensor, target_tensor) in enumerate(dataloader):
            input_tensor, target_tensor = input_tensor[0].to(device), target_tensor[0].to(device)
            encoder_hidden = encoder.initHidden().to(device)

            # Encoding
            for ei in range(input_tensor.size(0)):
                encoder_output, encoder_hidden = encoder(input_tensor[ei].unsqueeze(0), encoder_hidden)

            decoder_input = torch.tensor([[vocab["SOS"]]], device=device)
            decoder_hidden = encoder_hidden

            loss = 0
            predicted_tokens = []

            # Decoding
            for di in range(target_tensor.size(0)):
                decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
                topv, topi = decoder_output.topk(1)
                decoder_input = topi.squeeze().detach()

                if decoder_input.item() == vocab["EOS"]:
                    break

                # Collect predictions
                predicted_tokens.append(decoder_input.item())

                loss += criterion(decoder_output, target_tensor[di].unsqueeze(0)).item()

                # Calculate accuracy
                if decoder_input.item() == target_tensor[di]:
                    correct_tokens += 1
                total_tokens += 1

            total_loss += loss / target_tensor.size(0)

            # Save examples
            if i < num_examples:
                input_sentence = ' '.join([idx_to_word[t.item()] for t in input_tensor if t.item() not in [vocab['SOS'], vocab['EOS']]])
                target_sentence = ' '.join([idx_to_word[t.item()] for t in target_tensor if t.item() not in [vocab['SOS'], vocab['EOS']]])
                predicted_sentence = ' '.join([idx_to_word[t] for t in predicted_tokens if t not in [vocab['SOS'], vocab['EOS']]])
                examples.append((input_sentence, target_sentence, predicted_sentence))

    avg_loss = total_loss / len(dataloader)
    accuracy = correct_tokens / total_tokens

    # Print examples
    for input_sentence, target_sentence, predicted_sentence in examples:
        print(f'Input: {input_sentence}, Target: {target_sentence}, Predicted: {predicted_sentence}')

    return avg_loss, accuracy


In [13]:
val_loss, val_accuracy = validate_and_show_examples(encoder, decoder, val_loader, criterion, idx_to_word, num_examples=5)
print(f'Average Validation Loss: {val_loss:.4f}, Accuracy: {val_accuracy:.4f}')


Input: She sings a song, Target: Elle chante une chanson, Predicted: Elle rêve de voler
Input: We watch movies on Fridays, Target: Nous regardons des films le vendredi, Predicted: Nous apprenons quelque chose de nouveau chaque
Input: She is happy, Target: Elle est heureuse, Predicted: Le chat dort
Input: He is hungry, Target: Il a faim, Predicted: Il chante dans le
Input: We cook dinner together, Target: Nous cuisinons le dîner ensemble, Predicted: Nous prenons le petit déjeuner ensemble
Average Validation Loss: 6.6437, Accuracy: 0.1646


### **2**

In [21]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

english_to_french = [
    ("I am cold", "J'ai froid"),
    ("You are tired", "Tu es fatigué"),
    ("He is hungry", "Il a faim"),
    ("She is happy", "Elle est heureuse"),
    ("We are friends", "Nous sommes amis"),
    ("They are students", "Ils sont étudiants"),
    ("The cat is sleeping", "Le chat dort"),
    ("The sun is shining", "Le soleil brille"),
    ("We love music", "Nous aimons la musique"),
    ("She speaks French fluently", "Elle parle français couramment"),
    ("He enjoys reading books", "Il aime lire des livres"),
    ("They play soccer every weekend", "Ils jouent au football chaque week-end"),
    ("The movie starts at 7 PM", "Le film commence à 19 heures"),
    ("She wears a red dress", "Elle porte une robe rouge"),
    ("We cook dinner together", "Nous cuisinons le dîner ensemble"),
    ("He drives a blue car", "Il conduit une voiture bleue"),
    ("They visit museums often", "Ils visitent souvent des musées"),
    ("The restaurant serves delicious food", "Le restaurant sert une délicieuse cuisine"),
    ("She studies mathematics at university", "Elle étudie les mathématiques à l'université"),
    ("We watch movies on Fridays", "Nous regardons des films le vendredi"),
    ("He listens to music while jogging", "Il écoute de la musique en faisant du jogging"),
    ("They travel around the world", "Ils voyagent autour du monde"),
    ("The book is on the table", "Le livre est sur la table"),
    ("She dances gracefully", "Elle danse avec grâce"),
    ("We celebrate birthdays with cake", "Nous célébrons les anniversaires avec un gâteau"),
    ("He works hard every day", "Il travaille dur tous les jours"),
    ("They speak different languages", "Ils parlent différentes langues"),
    ("The flowers bloom in spring", "Les fleurs fleurissent au printemps"),
    ("She writes poetry in her free time", "Elle écrit de la poésie pendant son temps libre"),
    ("We learn something new every day", "Nous apprenons quelque chose de nouveau chaque jour"),
    ("The dog barks loudly", "Le chien aboie bruyamment"),
    ("He sings beautifully", "Il chante magnifiquement"),
    ("They swim in the pool", "Ils nagent dans la piscine"),
    ("The birds chirp in the morning", "Les oiseaux gazouillent le matin"),
    ("She teaches English at school", "Elle enseigne l'anglais à l'école"),
    ("We eat breakfast together", "Nous prenons le petit déjeuner ensemble"),
    ("He paints landscapes", "Il peint des paysages"),
    ("They laugh at the joke", "Ils rient de la blague"),
    ("The clock ticks loudly", "L'horloge tic-tac bruyamment"),
    ("She runs in the park", "Elle court dans le parc"),
    ("We travel by train", "Nous voyageons en train"),
    ("He writes a letter", "Il écrit une lettre"),
    ("They read books at the library", "Ils lisent des livres à la bibliothèque"),
    ("The baby cries", "Le bébé pleure"),
    ("She studies hard for exams", "Elle étudie dur pour les examens"),
    ("We plant flowers in the garden", "Nous plantons des fleurs dans le jardin"),
    ("He fixes the car", "Il répare la voiture"),
    ("They drink coffee in the morning", "Ils boivent du café le matin"),
    ("The sun sets in the evening", "Le soleil se couche le soir"),
    ("She dances at the party", "Elle danse à la fête"),
    ("We play music at the concert", "Nous jouons de la musique au concert"),
    ("He cooks dinner for his family", "Il cuisine le dîner pour sa famille"),
    ("They study French grammar", "Ils étudient la grammaire française"),
    ("The rain falls gently", "La pluie tombe doucement"),
    ("She sings a song", "Elle chante une chanson"),
    ("We watch a movie together", "Nous regardons un film ensemble"),
    ("He sleeps deeply", "Il dort profondément"),
    ("They travel to Paris", "Ils voyagent à Paris"),
    ("The children play in the park", "Les enfants jouent dans le parc"),
    ("She walks along the beach", "Elle se promène le long de la plage"),
    ("We talk on the phone", "Nous parlons au téléphone"),
    ("He waits for the bus", "Il attend le bus"),
    ("They visit the Eiffel Tower", "Ils visitent la tour Eiffel"),
    ("The stars twinkle at night", "Les étoiles scintillent la nuit"),
    ("She dreams of flying", "Elle rêve de voler"),
    ("We work in the office", "Nous travaillons au bureau"),
    ("He studies history", "Il étudie l'histoire"),
    ("They listen to the radio", "Ils écoutent la radio"),
    ("The wind blows gently", "Le vent souffle doucement"),
    ("She swims in the ocean", "Elle nage dans l'océan"),
    ("We dance at the wedding", "Nous dansons au mariage"),
    ("He climbs the mountain", "Il gravit la montagne"),
    ("They hike in the forest", "Ils font de la randonnée dans la forêt"),
    ("The cat meows loudly", "Le chat miaule bruyamment"),
    ("She paints a picture", "Elle peint un tableau"),
    ("We build a sandcastle", "Nous construisons un château de sable"),
    ("He sings in the choir", "Il chante dans le chœur")
]

# Special tokens for the start and end of sequences
SOS_token = 0  # Start Of Sequence Token
EOS_token = 1  # End Of Sequence Token
max_length = 12

# Preparing the character to index mapping and vice versa for English and French
def build_vocab(sentences):
    vocab = {"SOS": SOS_token, "EOS": EOS_token}
    index = 2  # Start indexing from 2 since 0 and 1 are reserved for SOS and EOS
    for pair in sentences:
        for sentence in pair:
            for word in sentence.split():
                if word not in vocab:
                    vocab[word] = index
                    index += 1
    return vocab




In [22]:
vocab = build_vocab(english_to_french)  # Returns a dictionary
dataset = TranslationDataset(english_to_french, vocab, vocab)  # Pass the same vocab for both if shared
dataloader = DataLoader(dataset, batch_size=1, shuffle=True)


english_vocab = build_vocab(english_to_french)
french_vocab = english_vocab

char_to_index_english = {"SOS": SOS_token, "EOS": EOS_token, **{char: i+2 for i, char in enumerate(sorted(list(english_vocab)))}}
index_to_char_english = {i: char for char, i in char_to_index_english.items()}

char_to_index_french = {"SOS": SOS_token, "EOS": EOS_token, **{char: i+2 for i, char in enumerate(sorted(list(french_vocab)))}}
index_to_char_french = {i: char for char, i in char_to_index_french.items()}

class TranslationDataset(Dataset):
    def __init__(self, dataset, char_to_index_english, char_to_index_french):
        self.dataset = dataset
        self.char_to_index_english = char_to_index_english
        self.char_to_index_french = char_to_index_french

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        english_sentence, french_sentence = self.dataset[idx]
        english_tensor = torch.tensor([self.char_to_index_english[word] for word in english_sentence.split()] + [EOS_token], dtype=torch.long)
        french_tensor = torch.tensor([self.char_to_index_french[word] for word in french_sentence.split()] + [EOS_token], dtype=torch.long)
        return english_tensor, french_tensor


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class Encoder(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(Encoder, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output, hidden = self.gru(embedded, hidden)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)



In [25]:

class AttnDecoder(nn.Module):
    def __init__(self, hidden_size, output_size, max_length=12, dropout_p=0.1):
        super(AttnDecoder, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout_p = dropout_p
        self.max_length = max_length

        self.embedding = nn.Embedding(self.output_size, self.hidden_size)
        self.attn = nn.Linear(self.hidden_size * 2, self.max_length)
        self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
        self.dropout = nn.Dropout(self.dropout_p)
        self.gru = nn.GRU(self.hidden_size, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input, hidden, encoder_outputs):
        embedded = self.embedding(input).view(1, 1, -1)
        embedded = self.dropout(embedded)

        attn_weights = torch.softmax(self.attn(torch.cat((embedded[0], hidden[0]), dim=1)), dim=1)
        attn_applied = torch.bmm(attn_weights.unsqueeze(0), encoder_outputs.unsqueeze(0))

        output = torch.cat((embedded[0], attn_applied[0]), 1)
        output = self.attn_combine(output).unsqueeze(0)

        output, hidden = self.gru(output, hidden)
        output = torch.log_softmax(self.out(output[0]), dim=1)
        return output, hidden, attn_weights

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

encoder = Encoder(input_size=len(char_to_index_english), hidden_size=256).to(device)
decoder = AttnDecoder(hidden_size=256, output_size=len(char_to_index_french), max_length=12).to(device)

encoder_optimizer = optim.SGD(encoder.parameters(), lr=0.01)
decoder_optimizer = optim.SGD(decoder.parameters(), lr=0.01)
criterion = nn.NLLLoss()

def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=12):
    encoder_hidden = encoder.initHidden()

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

    loss = 0

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(input_tensor[ei], encoder_hidden)
        encoder_outputs[ei] = encoder_output[0, 0]

    decoder_input = torch.tensor([[SOS_token]], device=device)
    decoder_hidden = encoder_hidden

    for di in range(target_length):
        decoder_output, decoder_hidden, _ = decoder(decoder_input, decoder_hidden, encoder_outputs)
        topv, topi = decoder_output.topk(1)
        decoder_input = topi.squeeze().detach()

        loss += criterion(decoder_output, target_tensor[di].unsqueeze(0))
        if decoder_input.item() == EOS_token:
            break

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length

# Training loop
n_epochs = 100
for epoch in range(n_epochs):
    total_loss = 0
    for input_tensor, target_tensor in dataloader:
        input_tensor = input_tensor[0].to(device)
        target_tensor = target_tensor[0].to(device)
        loss = train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion)
        total_loss += loss
    if epoch % 10 == 0:
        print(f'Epoch {epoch}, Loss: {total_loss / len(dataloader)}')

def evaluate_and_show_examples(encoder, decoder, dataloader, index_to_char_french, index_to_char_english, n_examples=5):
    encoder.eval()
    decoder.eval()

    examples = []

    with torch.no_grad():
        for i, (input_tensor, target_tensor) in enumerate(dataloader):
            if i >= n_examples: break
            input_tensor = input_tensor[0].to(device)
            target_tensor = target_tensor[0].to(device)

            encoder_hidden = encoder.initHidden()
            input_length = input_tensor.size(0)
            encoder_outputs = torch.zeros(12, encoder.hidden_size, device=device)

            for ei in range(input_length):
                encoder_output, encoder_hidden = encoder(input_tensor[ei], encoder_hidden)
                encoder_outputs[ei] += encoder_output[0, 0]

            decoder_input = torch.tensor([[SOS_token]], device=device)
            decoder_hidden = encoder_hidden

            decoded_words = []

            for di in range(12):
                decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs)
                topv, topi = decoder_output.data.topk(1)
                if topi.item() == EOS_token:
                    decoded_words.append('<EOS>')
                    break
                else:
                    decoded_words.append(index_to_char_french[topi.item()])

                decoder_input = topi.squeeze().detach()

            input_sentence = ' '.join([index_to_char_english[t.item()] for t in input_tensor if t.item() not in [SOS_token, EOS_token]])
            target_sentence = ' '.join([index_to_char_french[t.item()] for t in target_tensor if t.item() not in [SOS_token, EOS_token]])
            predicted_sentence = ' '.join(decoded_words)
            examples.append((input_sentence, target_sentence, predicted_sentence))

    for example in examples:
        print(f'Input: {example[0]}, Target: {example[1]}, Predicted: {example[2]}')

# Assuming `dataloader`, `index_to_char_french`, and `index_to_char_english` are defined
evaluate_and_show_examples(encoder, decoder, dataloader, index_to_char_french, index_to_char_english)

Epoch 0, Loss: 3.4905259095179955
Epoch 10, Loss: 2.5898041025482974
Epoch 20, Loss: 1.268367848119105
Epoch 30, Loss: 0.2854001055010642
Epoch 40, Loss: 0.07485782148170979
Epoch 50, Loss: 0.03699374344231381
Epoch 60, Loss: 0.0250416412319465
Epoch 70, Loss: 0.018200209976959737
Epoch 80, Loss: 0.01460494970235842
Epoch 90, Loss: 0.012086998224043395
Input: You brille bruyamment build bureau, Target: aboie bus by bébé café cake, Predicted: aboie bus by bébé café cake <EOS>
Input: Ils soccer soir, Target: La are soleil, Predicted: La are soleil <EOS>
Input: You examens enfants something, Target: aboie falls chien something, Predicted: aboie falls chien something <EOS>
Input: You examens exams faim faisant, Target: aboie falls famille every family, Predicted: aboie falls famille every family <EOS>
Input: Ils hungry Le parlons, Target: La joke climbs party, Predicted: La joke climbs party <EOS>


### **3**

In [38]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset


# Sample dataset of English-French sentence pairs
english_to_french = [
    ("I am cold", "J'ai froid"),
    ("You are tired", "Tu es fatigué"),
    ("He is hungry", "Il a faim"),
    ("She is happy", "Elle est heureuse"),
    ("We are friends", "Nous sommes amis"),
    ("They are students", "Ils sont étudiants"),
    ("The cat is sleeping", "Le chat dort"),
    ("The sun is shining", "Le soleil brille"),
    ("We love music", "Nous aimons la musique"),
    ("She speaks French fluently", "Elle parle français couramment"),
    ("He enjoys reading books", "Il aime lire des livres"),
    ("They play soccer every weekend", "Ils jouent au football chaque week-end"),
    ("The movie starts at 7 PM", "Le film commence à 19 heures"),
    ("She wears a red dress", "Elle porte une robe rouge"),
    ("We cook dinner together", "Nous cuisinons le dîner ensemble"),
    ("He drives a blue car", "Il conduit une voiture bleue"),
    ("They visit museums often", "Ils visitent souvent des musées"),
    ("The restaurant serves delicious food", "Le restaurant sert une délicieuse cuisine"),
    ("She studies mathematics at university", "Elle étudie les mathématiques à l'université"),
    ("We watch movies on Fridays", "Nous regardons des films le vendredi"),
    ("He listens to music while jogging", "Il écoute de la musique en faisant du jogging"),
    ("They travel around the world", "Ils voyagent autour du monde"),
    ("The book is on the table", "Le livre est sur la table"),
    ("She dances gracefully", "Elle danse avec grâce"),
    ("We celebrate birthdays with cake", "Nous célébrons les anniversaires avec un gâteau"),
    ("He works hard every day", "Il travaille dur tous les jours"),
    ("They speak different languages", "Ils parlent différentes langues"),
    ("The flowers bloom in spring", "Les fleurs fleurissent au printemps"),
    ("She writes poetry in her free time", "Elle écrit de la poésie pendant son temps libre"),
    ("We learn something new every day", "Nous apprenons quelque chose de nouveau chaque jour"),
    ("The dog barks loudly", "Le chien aboie bruyamment"),
    ("He sings beautifully", "Il chante magnifiquement"),
    ("They swim in the pool", "Ils nagent dans la piscine"),
    ("The birds chirp in the morning", "Les oiseaux gazouillent le matin"),
    ("She teaches English at school", "Elle enseigne l'anglais à l'école"),
    ("We eat breakfast together", "Nous prenons le petit déjeuner ensemble"),
    ("He paints landscapes", "Il peint des paysages"),
    ("They laugh at the joke", "Ils rient de la blague"),
    ("The clock ticks loudly", "L'horloge tic-tac bruyamment"),
    ("She runs in the park", "Elle court dans le parc"),
    ("We travel by train", "Nous voyageons en train"),
    ("He writes a letter", "Il écrit une lettre"),
    ("They read books at the library", "Ils lisent des livres à la bibliothèque"),
    ("The baby cries", "Le bébé pleure"),
    ("She studies hard for exams", "Elle étudie dur pour les examens"),
    ("We plant flowers in the garden", "Nous plantons des fleurs dans le jardin"),
    ("He fixes the car", "Il répare la voiture"),
    ("They drink coffee in the morning", "Ils boivent du café le matin"),
    ("The sun sets in the evening", "Le soleil se couche le soir"),
    ("She dances at the party", "Elle danse à la fête"),
    ("We play music at the concert", "Nous jouons de la musique au concert"),
    ("He cooks dinner for his family", "Il cuisine le dîner pour sa famille"),
    ("They study French grammar", "Ils étudient la grammaire française"),
    ("The rain falls gently", "La pluie tombe doucement"),
    ("She sings a song", "Elle chante une chanson"),
    ("We watch a movie together", "Nous regardons un film ensemble"),
    ("He sleeps deeply", "Il dort profondément"),
    ("They travel to Paris", "Ils voyagent à Paris"),
    ("The children play in the park", "Les enfants jouent dans le parc"),
    ("She walks along the beach", "Elle se promène le long de la plage"),
    ("We talk on the phone", "Nous parlons au téléphone"),
    ("He waits for the bus", "Il attend le bus"),
    ("They visit the Eiffel Tower", "Ils visitent la tour Eiffel"),
    ("The stars twinkle at night", "Les étoiles scintillent la nuit"),
    ("She dreams of flying", "Elle rêve de voler"),
    ("We work in the office", "Nous travaillons au bureau"),
    ("He studies history", "Il étudie l'histoire"),
    ("They listen to the radio", "Ils écoutent la radio"),
    ("The wind blows gently", "Le vent souffle doucement"),
    ("She swims in the ocean", "Elle nage dans l'océan"),
    ("We dance at the wedding", "Nous dansons au mariage"),
    ("He climbs the mountain", "Il gravit la montagne"),
    ("They hike in the forest", "Ils font de la randonnée dans la forêt"),
    ("The cat meows loudly", "Le chat miaule bruyamment"),
    ("She paints a picture", "Elle peint un tableau"),
    ("We build a sandcastle", "Nous construisons un château de sable"),
    ("He sings in the choir", "Il chante dans le chœur")
]

french_to_english = [(french, english) for (english, french) in english_to_french]

# Special tokens for the start and end of sequences
SOS_token = 0  # Start Of Sequence Token
EOS_token = 1  # End Of Sequence Token
max_length = 12


In [39]:
# Preparing the character to index mapping and vice versa for English and French
def build_vocab(sentences):
    vocab = set()
    for pair in sentences:
        english_sentence, french_sentence = pair
        for word in english_sentence.split():
            vocab.add(word)
        for word in french_sentence.split():
            vocab.add(word)
    return vocab

english_vocab = build_vocab(english_to_french)
french_vocab = english_vocab

In [40]:
char_to_index_english = {"SOS": SOS_token, "EOS": EOS_token, **{char: i+2 for i, char in enumerate(sorted(list(english_vocab)))}}
index_to_char_english = {i: char for char, i in char_to_index_english.items()}

char_to_index_french = {"SOS": SOS_token, "EOS": EOS_token, **{char: i+2 for i, char in enumerate(sorted(list(french_vocab)))}}
index_to_char_french = {i: char for char, i in char_to_index_french.items()}


In [41]:
class EnglishFrenchDataset(Dataset):
    """Custom Dataset class for handling English-French sentence pairs."""
    def __init__(self, dataset, char_to_index_english, char_to_index_french):
        self.dataset = dataset
        self.char_to_index_english = char_to_index_english
        self.char_to_index_french = char_to_index_french

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        english_sentence, french_sentence = self.dataset[idx]
        english_tensor = torch.tensor([self.char_to_index_english[word] for word in english_sentence.split()] + [EOS_token], dtype=torch.long)
        french_tensor = torch.tensor([self.char_to_index_french[word] for word in french_sentence.split()] + [EOS_token], dtype=torch.long)
        return english_tensor, french_tensor

english_french_dataset = EnglishFrenchDataset(french_to_english, char_to_index_french, char_to_index_english)
dataloader = DataLoader(english_french_dataset, batch_size=1, shuffle=True)

In [42]:
# Initialize encoder and decoder
input_size_english = len(char_to_index_english)
input_size_french = len(char_to_index_french)
hidden_size = 256  # You can adjust this as needed

class Encoder(nn.Module):
    """Encoder that processes the input sequence and returns its hidden states."""
    def __init__(self, input_size, hidden_size):
        super(Encoder, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output, hidden = self.lstm(embedded, hidden)
        return output, hidden

    def initHidden(self):
        return (torch.zeros(1, 1, self.hidden_size, device=device),
                torch.zeros(1, 1, self.hidden_size, device=device))

In [43]:
class AttnDecoder(nn.Module):
    """Decoder with attention mechanism."""
    def __init__(self, hidden_size, output_size, max_length=12, dropout_p=0.1):
        super(AttnDecoder, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout_p = dropout_p
        self.max_length = max_length

        self.embedding = nn.Embedding(self.output_size, self.hidden_size)
        self.attn = nn.Linear(self.hidden_size * 2, self.max_length)
        self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
        self.dropout = nn.Dropout(self.dropout_p)
        self.lstm = nn.LSTM(self.hidden_size, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, output_size)

    def forward(self, input, hidden, encoder_outputs):
        embedded = self.embedding(input).view(1, 1, -1)
        embedded = self.dropout(embedded)

        attn_weights = torch.softmax(
            self.attn(torch.cat((embedded[0], hidden[0][0]), 1)), dim=1)
        attn_applied = torch.bmm(attn_weights.unsqueeze(0),
                                 encoder_outputs.unsqueeze(0))

        output = torch.cat((embedded[0], attn_applied[0]), 1)
        output = self.attn_combine(output).unsqueeze(0)

        output = torch.relu(output)
        output, hidden = self.lstm(output, hidden)

        output = torch.log_softmax(self.out(output[0]), dim=1)
        return output, hidden, attn_weights

    def initHidden(self):
        return (torch.zeros(1, 1, self.hidden_size, device=device),
                torch.zeros(1, 1, self.hidden_size, device=device))

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define loss function and optimizer
learning_rate = 0.01
criterion = nn.NLLLoss()
encoder_english = Encoder(input_size=input_size_english, hidden_size=hidden_size).to(device)
decoder_french = AttnDecoder(hidden_size=hidden_size, output_size=input_size_french).to(device)
encoder_optimizer = optim.SGD(encoder_english.parameters(), lr=learning_rate)
decoder_optimizer = optim.SGD(decoder_french.parameters(), lr=learning_rate)

In [44]:
# Training loop
def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=12):
    encoder_hidden = encoder.initHidden()

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

    loss = 0

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(input_tensor[ei].unsqueeze(0), encoder_hidden)
        encoder_outputs[ei] = encoder_output[0, 0]

    decoder_input = torch.tensor([[SOS_token]], device=device)
    decoder_hidden = encoder_hidden

    for di in range(target_length):
        decoder_output, decoder_hidden, decoder_attention = decoder(
            decoder_input, decoder_hidden, encoder_outputs)
        topv, topi = decoder_output.topk(1)
        decoder_input = topi.squeeze().detach()

        loss += criterion(decoder_output, target_tensor[di].unsqueeze(0))
        if decoder_input.item() == EOS_token:
            break

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length

In [37]:
n_epochs = 100

for epoch in range(n_epochs):
    total_loss = 0
    for input_tensor_english, target_tensor_french in dataloader:
        input_tensor_english = input_tensor_english[0].to(device)
        target_tensor_french = target_tensor_french[0].to(device)

        loss = train(input_tensor_english, target_tensor_french, encoder_english, decoder_french, encoder_optimizer, decoder_optimizer, criterion)
        total_loss += loss

    if epoch % 10 == 0:
        print(f'Epoch {epoch}, Loss: {total_loss / len(dataloader)}')

def evaluate_and_show_examples(encoder, decoder, dataloader, criterion, n_examples=5):
    encoder.eval()
    decoder.eval()

    total_loss = 0
    total_sentences = 0
    correct_predictions = 0

    with torch.no_grad():
        for i, (input_tensor_english, target_tensor_french) in enumerate(dataloader):
            input_tensor_english = input_tensor_english[0].to(device)
            target_tensor_french = target_tensor_french[0].to(device)

            encoder_hidden = encoder.initHidden()
            input_length = input_tensor_english.size(0)
            target_length = target_tensor_french.size(0)

            encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

            loss = 0

            for ei in range(input_length):
                encoder_output, encoder_hidden = encoder(input_tensor_english[ei].unsqueeze(0), encoder_hidden)
                encoder_outputs[ei] = encoder_output[0, 0]

            decoder_input = torch.tensor([[SOS_token]], device=device)
            decoder_hidden = encoder_hidden

            predicted_indices = []

            for di in range(target_length):
                decoder_output, decoder_hidden, decoder_attention = decoder(
                    decoder_input, decoder_hidden, encoder_outputs)
                topv, topi = decoder_output.topk(1)
                predicted_indices.append(topi.item())
                decoder_input = topi.squeeze().detach()

                loss += criterion(decoder_output, target_tensor_french[di].unsqueeze(0))
                if decoder_input.item() == EOS_token:
                    break

            total_loss += loss.item() / target_length
            total_sentences += 1

            if predicted_indices == target_tensor_french.tolist():
                correct_predictions += 1

            if i < n_examples:
                predicted_string = ' '.join([index_to_char_french[index] for index in predicted_indices if index not in (SOS_token, EOS_token)])
                target_string = ' '.join([index_to_char_french[index.item()] for index in target_tensor_french if index.item() not in (SOS_token, EOS_token)])
                input_string = ' '.join([index_to_char_english[index.item()] for index in input_tensor_english if index.item() not in (SOS_token, EOS_token)])

                print(f'Input: {input_string}, Target: {target_string}, Predicted: {predicted_string}')

        average_loss = total_loss / len(dataloader)
        accuracy = correct_predictions / total_sentences
        print(f'Evaluation Loss: {average_loss}, Accuracy: {accuracy}')

# Perform evaluation
evaluate_and_show_examples(encoder_english, decoder_french, dataloader, criterion)

Epoch 0, Loss: 4.010834950197605
Epoch 10, Loss: 2.902479066772083
Epoch 20, Loss: 2.1684347780678257
Epoch 30, Loss: 0.9655787520926165
Epoch 40, Loss: 0.21072806847928552
Epoch 50, Loss: 0.0795301086859675
Epoch 60, Loss: 0.04852596628505577
Epoch 70, Loss: 0.0339684262887869
Epoch 80, Loss: 0.02606688066867762
Epoch 90, Loss: 0.020884563858536154
Input: Le chat dort, Target: The cat is sleeping, Predicted: The cat is sleeping
Input: Les enfants jouent dans le parc, Target: The children play in the park, Predicted: The children play in the park
Input: Nous apprenons quelque chose de nouveau chaque jour, Target: We learn something new every day, Predicted: We learn something new every day
Input: Le chien aboie bruyamment, Target: The dog barks loudly, Predicted: The dog barks loudly
Input: Le vent souffle doucement, Target: The wind blows gently, Predicted: The wind blows gently
Evaluation Loss: 0.016661303533366707, Accuracy: 1.0
