<a href="https://colab.research.google.com/github/UmaNagirireddi/RTML/blob/main/HW4/Uma_RTML4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

# Sample dataset of English-French sentence pairs
english_to_french = [
    ("I am cold", "J'ai froid"),
    ("You are tired", "Tu es fatigué"),
    ("He is hungry", "Il a faim"),
    ("She is happy", "Elle est heureuse"),
    ("We are friends", "Nous sommes amis"),
    ("They are students", "Ils sont étudiants"),
    ("The cat is sleeping", "Le chat dort"),
    ("The sun is shining", "Le soleil brille"),
    ("We love music", "Nous aimons la musique"),
    ("She speaks French fluently", "Elle parle français couramment"),
    ("He enjoys reading books", "Il aime lire des livres"),
    ("They play soccer every weekend", "Ils jouent au football chaque week-end"),
    ("The movie starts at 7 PM", "Le film commence à 19 heures"),
    ("She wears a red dress", "Elle porte une robe rouge"),
    ("We cook dinner together", "Nous cuisinons le dîner ensemble"),
    ("He drives a blue car", "Il conduit une voiture bleue"),
    ("They visit museums often", "Ils visitent souvent des musées"),
    ("The restaurant serves delicious food", "Le restaurant sert une délicieuse cuisine"),
    ("She studies mathematics at university", "Elle étudie les mathématiques à l'université"),
    ("We watch movies on Fridays", "Nous regardons des films le vendredi"),
    ("He listens to music while jogging", "Il écoute de la musique en faisant du jogging"),
    ("They travel around the world", "Ils voyagent autour du monde"),
    ("The book is on the table", "Le livre est sur la table"),
    ("She dances gracefully", "Elle danse avec grâce"),
    ("We celebrate birthdays with cake", "Nous célébrons les anniversaires avec un gâteau"),
    ("He works hard every day", "Il travaille dur tous les jours"),
    ("They speak different languages", "Ils parlent différentes langues"),
    ("The flowers bloom in spring", "Les fleurs fleurissent au printemps"),
    ("She writes poetry in her free time", "Elle écrit de la poésie pendant son temps libre"),
    ("We learn something new every day", "Nous apprenons quelque chose de nouveau chaque jour"),
    ("The dog barks loudly", "Le chien aboie bruyamment"),
    ("He sings beautifully", "Il chante magnifiquement"),
    ("They swim in the pool", "Ils nagent dans la piscine"),
    ("The birds chirp in the morning", "Les oiseaux gazouillent le matin"),
    ("She teaches English at school", "Elle enseigne l'anglais à l'école"),
    ("We eat breakfast together", "Nous prenons le petit déjeuner ensemble"),
    ("He paints landscapes", "Il peint des paysages"),
    ("They laugh at the joke", "Ils rient de la blague"),
    ("The clock ticks loudly", "L'horloge tic-tac bruyamment"),
    ("She runs in the park", "Elle court dans le parc"),
    ("We travel by train", "Nous voyageons en train"),
    ("He writes a letter", "Il écrit une lettre"),
    ("They read books at the library", "Ils lisent des livres à la bibliothèque"),
    ("The baby cries", "Le bébé pleure"),
    ("She studies hard for exams", "Elle étudie dur pour les examens"),
    ("We plant flowers in the garden", "Nous plantons des fleurs dans le jardin"),
    ("He fixes the car", "Il répare la voiture"),
    ("They drink coffee in the morning", "Ils boivent du café le matin"),
    ("The sun sets in the evening", "Le soleil se couche le soir"),
    ("She dances at the party", "Elle danse à la fête"),
    ("We play music at the concert", "Nous jouons de la musique au concert"),
    ("He cooks dinner for his family", "Il cuisine le dîner pour sa famille"),
    ("They study French grammar", "Ils étudient la grammaire française"),
    ("The rain falls gently", "La pluie tombe doucement"),
    ("She sings a song", "Elle chante une chanson"),
    ("We watch a movie together", "Nous regardons un film ensemble"),
    ("He sleeps deeply", "Il dort profondément"),
    ("They travel to Paris", "Ils voyagent à Paris"),
    ("The children play in the park", "Les enfants jouent dans le parc"),
    ("She walks along the beach", "Elle se promène le long de la plage"),
    ("We talk on the phone", "Nous parlons au téléphone"),
    ("He waits for the bus", "Il attend le bus"),
    ("They visit the Eiffel Tower", "Ils visitent la tour Eiffel"),
    ("The stars twinkle at night", "Les étoiles scintillent la nuit"),
    ("She dreams of flying", "Elle rêve de voler"),
    ("We work in the office", "Nous travaillons au bureau"),
    ("He studies history", "Il étudie l'histoire"),
    ("They listen to the radio", "Ils écoutent la radio"),
    ("The wind blows gently", "Le vent souffle doucement"),
    ("She swims in the ocean", "Elle nage dans l'océan"),
    ("We dance at the wedding", "Nous dansons au mariage"),
    ("He climbs the mountain", "Il gravit la montagne"),
    ("They hike in the forest", "Ils font de la randonnée dans la forêt"),
    ("The cat meows loudly", "Le chat miaule bruyamment"),
    ("She paints a picture", "Elle peint un tableau"),
    ("We build a sandcastle", "Nous construisons un château de sable"),
    ("He sings in the choir", "Il chante dans le chœur")
]

# Start and End of Sequence Tokens
START_TOKEN = 0
END_TOKEN = 1
sentence_max_length = 12

def prepare_vocab(sentences):
    vocab_set = set()
    for en, fr in sentences:
        vocab_set.update(en.split(), fr.split())
    return vocab_set

vocab = prepare_vocab(english_to_french) # sentence_pairs needs to be defined or passed to this function

en_fr_vocab = {word: idx+2 for idx, word in enumerate(sorted(vocab))}
en_fr_vocab["START"] = START_TOKEN
en_fr_vocab["END"] = END_TOKEN

idx_to_word = {idx: word for word, idx in en_fr_vocab.items()}

class SentencePairsDataset(Dataset):
    def __init__(self, pairs, vocab):
        self.pairs = pairs
        self.vocab = vocab

    def __len__(self):
        return len(self.pairs)

    def __getitem__(self, idx):
        en, fr = self.pairs[idx]
        en_indices = [self.vocab[word] for word in en.split()] + [END_TOKEN]
        fr_indices = [self.vocab[word] for word in fr.split()] + [END_TOKEN]
        return torch.tensor(en_indices, dtype=torch.long), torch.tensor(fr_indices, dtype=torch.long)

dataset = SentencePairsDataset(english_to_french, en_fr_vocab) # sentence_pairs needs to be a list of (en, fr) tuples
loader = DataLoader(dataset, batch_size=1, shuffle=True)

class Seq2SeqEncoder(nn.Module):
    def __init__(self, vocab_size, hidden_size):
        super().__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(vocab_size, hidden_size)
        self.rnn = nn.LSTM(hidden_size, hidden_size)

    def forward(self, input_seq, hidden_state):
        embedded = self.embedding(input_seq).view(1, 1, -1)
        output, hidden_state = self.rnn(embedded, hidden_state)
        return output, hidden_state

    def initialize_hidden(self):
        return (torch.zeros(1, 1, self.hidden_size, device=compute_device),
            torch.zeros(1, 1, self.hidden_size, device=compute_device))

class Seq2SeqDecoder(nn.Module):
    def __init__(self, hidden_size, vocab_size):
        super().__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(vocab_size, hidden_size)
        self.rnn = nn.LSTM(hidden_size, hidden_size)
        self.out = nn.Linear(hidden_size, vocab_size)

    def forward(self, input_seq, hidden_state):
        embedded = self.embedding(input_seq).view(1, 1, -1)
        output, hidden_state = self.rnn(embedded, hidden_state)
        output = torch.log_softmax(self.out(output[0]), dim=1)
        return output, hidden_state

compute_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

encoder = Seq2SeqEncoder(len(en_fr_vocab), 256).to(compute_device)
decoder = Seq2SeqDecoder(256, len(en_fr_vocab)).to(compute_device)

optim_encoder = optim.SGD(encoder.parameters(), lr=0.01)
optim_decoder = optim.SGD(decoder.parameters(), lr=0.01)
loss_fn = nn.NLLLoss()

def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, loss_fn, max_length=sentence_max_length):
    encoder_hidden = encoder.initialize_hidden()

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    loss = 0

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(input_tensor[ei].unsqueeze(0), encoder_hidden)

    decoder_input = torch.tensor([[START_TOKEN]], device=compute_device)
    decoder_hidden = encoder_hidden

    for di in range(target_length):
        decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
        topv, topi = decoder_output.topk(1)
        decoder_input = topi.squeeze().detach()

        loss += loss_fn(decoder_output, target_tensor[di].unsqueeze(0))
        if decoder_input.item() == END_TOKEN:
            break

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length

# Training loop
for epoch in range(1, 101):
    total_loss = 0
    for input_tensor, target_tensor in loader:
        input_tensor = input_tensor[0].to(compute_device)
        target_tensor = target_tensor[0].to(compute_device)

        loss = train(input_tensor, target_tensor, encoder, decoder, optim_encoder, optim_decoder, loss_fn)
        total_loss += loss

    if epoch % 10 == 0:
        print(f'Epoch {epoch}, Loss: {total_loss / len(loader)}')

def evaluate_accuracy(output, target):
    _, predicted = torch.max(output, 1)
    correct = (predicted == target).float().sum().item()
    return correct, target.size(0)

def evaluate(encoder, decoder, loader, loss_fn, max_length=sentence_max_length):
    encoder.eval()
    decoder.eval()

    total_loss = 0
    total_correct = 0
    total_tokens = 0

    with torch.no_grad():
        for input_tensor, target_tensor in loader:
            input_tensor = input_tensor[0].to(compute_device)
            target_tensor = target_tensor[0].to(compute_device)
            encoder_hidden = encoder.initialize_hidden()

            input_length = input_tensor.size(0)
            target_length = target_tensor.size(0)

            for ei in range(input_length):
                encoder_output, encoder_hidden = encoder(input_tensor[ei].unsqueeze(0), encoder_hidden)

            decoder_input = torch.tensor([[START_TOKEN]], device=compute_device)
            decoder_hidden = encoder_hidden

            loss = 0

            for di in range(target_length):
                decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
                topv, topi = decoder_output.topk(1)
                decoder_input = topi.squeeze().detach()

                correct, total = evaluate_accuracy(decoder_output, target_tensor[di].unsqueeze(0))
                total_correct += correct
                total_tokens += total

                loss += loss_fn(decoder_output, target_tensor[di].unsqueeze(0))
                if decoder_input.item() == END_TOKEN:
                    break

            total_loss += loss.item() / target_length

    average_loss = total_loss / len(loader)
    accuracy = total_correct / total_tokens * 100
    print(f'Validation Loss: {average_loss}, Accuracy: {accuracy:.2f}%')

# Evaluation call
evaluate(encoder, decoder, loader, loss_fn)


Epoch 10, Loss: 2.915621383203425
Epoch 20, Loss: 2.274224304759706
Epoch 30, Loss: 1.6384521263551524
Epoch 40, Loss: 0.9227884099719894
Epoch 50, Loss: 0.29093866994106465
Epoch 60, Loss: 0.11089227169911853
Epoch 70, Loss: 0.061343240319309475
Epoch 80, Loss: 0.036692526695089864
Epoch 90, Loss: 0.02747195854259321
Epoch 100, Loss: 0.021966910504889012
Validation Loss: 0.021341064021258568, Accuracy: 100.00%


In [None]:
def translate(input_tensor, encoder, decoder, max_length=sentence_max_length):
    with torch.no_grad():
        input_length = input_tensor.size(0)
        encoder_hidden = encoder.initialize_hidden()

        for ei in range(input_length):
            _, encoder_hidden = encoder(input_tensor[ei].unsqueeze(0), encoder_hidden)

        decoder_input = torch.tensor([[START_TOKEN]], device=compute_device)  # SOS
        decoder_hidden = encoder_hidden

        decoded_words = []

        for di in range(max_length):
            decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
            topv, topi = decoder_output.data.topk(1)
            if topi.item() == END_TOKEN:
                break
            else:
                decoded_words.append(idx_to_word[topi.item()])

            decoder_input = topi.squeeze().detach()

        return decoded_words

def translate_and_show(encoder, decoder, dataset, n=5):
    for i in range(n):
        pair = dataset[i]
        print('>', ' '.join([idx_to_word[index.item()] for index in pair[0] if index.item() not in (START_TOKEN, END_TOKEN)]))
        print('=', ' '.join([idx_to_word[index.item()] for index in pair[1] if index.item() not in (START_TOKEN, END_TOKEN)]))
        output_words = translate(pair[0].to(compute_device), encoder, decoder)
        output_sentence = ' '.join(output_words)
        print('<', output_sentence)
        print('')

# Example of how to call the function after training
translate_and_show(encoder, decoder, dataset)

> I am cold
= J'ai froid
< J'ai froid

> You are tired
= Tu es fatigué
< Tu es fatigué

> He is hungry
= Il a faim
< Il a faim

> She is happy
= Elle est heureuse
< Elle est heureuse

> We are friends
= Nous sommes amis
< Nous sommes amis



**Prob: 2**

In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split

# Sample dataset of English-French sentence pairs
sentence_pairs = [
    ("I am cold", "J'ai froid"),
    ("You are tired", "Tu es fatigué"),
    ("He is hungry", "Il a faim"),
    ("She is happy", "Elle est heureuse"),
    ("We are friends", "Nous sommes amis"),
    ("They are students", "Ils sont étudiants"),
    ("The cat is sleeping", "Le chat dort"),
    ("The sun is shining", "Le soleil brille"),
    ("We love music", "Nous aimons la musique"),
    ("She speaks French fluently", "Elle parle français couramment"),
    ("He enjoys reading books", "Il aime lire des livres"),
    ("They play soccer every weekend", "Ils jouent au football chaque week-end"),
    ("The movie starts at 7 PM", "Le film commence à 19 heures"),
    ("She wears a red dress", "Elle porte une robe rouge"),
    ("We cook dinner together", "Nous cuisinons le dîner ensemble"),
    ("He drives a blue car", "Il conduit une voiture bleue"),
    ("They visit museums often", "Ils visitent souvent des musées"),
    ("The restaurant serves delicious food", "Le restaurant sert une délicieuse cuisine"),
    ("She studies mathematics at university", "Elle étudie les mathématiques à l'université"),
    ("We watch movies on Fridays", "Nous regardons des films le vendredi"),
    ("He listens to music while jogging", "Il écoute de la musique en faisant du jogging"),
    ("They travel around the world", "Ils voyagent autour du monde"),
    ("The book is on the table", "Le livre est sur la table"),
    ("She dances gracefully", "Elle danse avec grâce"),
    ("We celebrate birthdays with cake", "Nous célébrons les anniversaires avec un gâteau"),
    ("He works hard every day", "Il travaille dur tous les jours"),
    ("They speak different languages", "Ils parlent différentes langues"),
    ("The flowers bloom in spring", "Les fleurs fleurissent au printemps"),
    ("She writes poetry in her free time", "Elle écrit de la poésie pendant son temps libre"),
    ("We learn something new every day", "Nous apprenons quelque chose de nouveau chaque jour"),
    ("The dog barks loudly", "Le chien aboie bruyamment"),
    ("He sings beautifully", "Il chante magnifiquement"),
    ("They swim in the pool", "Ils nagent dans la piscine"),
    ("The birds chirp in the morning", "Les oiseaux gazouillent le matin"),
    ("She teaches English at school", "Elle enseigne l'anglais à l'école"),
    ("We eat breakfast together", "Nous prenons le petit déjeuner ensemble"),
    ("He paints landscapes", "Il peint des paysages"),
    ("They laugh at the joke", "Ils rient de la blague"),
    ("The clock ticks loudly", "L'horloge tic-tac bruyamment"),
    ("She runs in the park", "Elle court dans le parc"),
    ("We travel by train", "Nous voyageons en train"),
    ("He writes a letter", "Il écrit une lettre"),
    ("They read books at the library", "Ils lisent des livres à la bibliothèque"),
    ("The baby cries", "Le bébé pleure"),
    ("She studies hard for exams", "Elle étudie dur pour les examens"),
    ("We plant flowers in the garden", "Nous plantons des fleurs dans le jardin"),
    ("He fixes the car", "Il répare la voiture"),
    ("They drink coffee in the morning", "Ils boivent du café le matin"),
    ("The sun sets in the evening", "Le soleil se couche le soir"),
    ("She dances at the party", "Elle danse à la fête"),
    ("We play music at the concert", "Nous jouons de la musique au concert"),
    ("He cooks dinner for his family", "Il cuisine le dîner pour sa famille"),
    ("They study French grammar", "Ils étudient la grammaire française"),
    ("The rain falls gently", "La pluie tombe doucement"),
    ("She sings a song", "Elle chante une chanson"),
    ("We watch a movie together", "Nous regardons un film ensemble"),
    ("He sleeps deeply", "Il dort profondément"),
    ("They travel to Paris", "Ils voyagent à Paris"),
    ("The children play in the park", "Les enfants jouent dans le parc"),
    ("She walks along the beach", "Elle se promène le long de la plage"),
    ("We talk on the phone", "Nous parlons au téléphone"),
    ("He waits for the bus", "Il attend le bus"),
    ("They visit the Eiffel Tower", "Ils visitent la tour Eiffel"),
    ("The stars twinkle at night", "Les étoiles scintillent la nuit"),
    ("She dreams of flying", "Elle rêve de voler"),
    ("We work in the office", "Nous travaillons au bureau"),
    ("He studies history", "Il étudie l'histoire"),
    ("They listen to the radio", "Ils écoutent la radio"),
    ("The wind blows gently", "Le vent souffle doucement"),
    ("She swims in the ocean", "Elle nage dans l'océan"),
    ("We dance at the wedding", "Nous dansons au mariage"),
    ("He climbs the mountain", "Il gravit la montagne"),
    ("They hike in the forest", "Ils font de la randonnée dans la forêt"),
    ("The cat meows loudly", "Le chat miaule bruyamment"),
    ("She paints a picture", "Elle peint un tableau"),
    ("We build a sandcastle", "Nous construisons un château de sable"),
    ("He sings in the choir", "Il chante dans le chœur")
]

# Constants for sequence tokens
START_TOKEN = 0
END_TOKEN = 1
MAX_LENGTH = 12

# Vocabulary building function
def construct_vocab(sentences):
    vocabulary = set()
    for eng_sentence, fr_sentence in sentences:
        vocabulary.update(eng_sentence.split(), fr_sentence.split())
    return vocabulary

# Build vocabularies
vocab = construct_vocab(sentence_pairs)
eng_to_index = {"SOS": START_TOKEN, "EOS": END_TOKEN, **{word: i+2 for i, word in enumerate(sorted(vocab))}}
index_to_eng = {index: word for word, index in eng_to_index.items()}
fr_to_index = eng_to_index
index_to_fr = index_to_eng


In [7]:
# Custom dataset class for translation pairs
class TranslationDataset(Dataset):
    def __init__(self, pairs, source_vocab, target_vocab):
        self.pairs = pairs
        self.source_vocab = source_vocab
        self.target_vocab = target_vocab

    def __len__(self):
        return len(self.pairs)

    def __getitem__(self, idx):
        source_sentence, target_sentence = self.pairs[idx]
        source_indices = [self.source_vocab[word] for word in source_sentence.split()] + [END_TOKEN]
        target_indices = [self.target_vocab[word] for word in target_sentence.split()] + [END_TOKEN]
        return torch.tensor(source_indices, dtype=torch.long), torch.tensor(target_indices, dtype=torch.long)

# Encoder and Decoder classes
class SequenceEncoder(nn.Module):
    def __init__(self, vocab_size, hidden_dim):
        super(SequenceEncoder, self).__init__()
        self.hidden_dim = hidden_dim
        self.embedding = nn.Embedding(vocab_size, hidden_dim)
        self.lstm = nn.LSTM(hidden_dim, hidden_dim)

    def forward(self, input_seq, hidden_state):
        embedded_seq = self.embedding(input_seq).view(1, 1, -1)
        output, hidden_state = self.lstm(embedded_seq, hidden_state)
        return output, hidden_state

    def init_hidden(self):
        return (torch.zeros(1, 1, self.hidden_dim, device = device),
                torch.zeros(1, 1, self.hidden_dim, device = device))

class AttentionDecoder(nn.Module):
    def __init__(self, hidden_dim, output_size, dropout_p=0.1):
        super(AttentionDecoder, self).__init__()
        self.hidden_dim = hidden_dim
        self.output_size = output_size
        self.dropout_p = dropout_p

        self.embedding = nn.Embedding(output_size, hidden_dim)
        self.attention = nn.Linear(hidden_dim * 2, MAX_LENGTH)
        self.attention_combine = nn.Linear(hidden_dim * 2, hidden_dim)
        self.dropout = nn.Dropout(dropout_p)
        self.lstm = nn.LSTM(hidden_dim, hidden_dim)
        self.output_layer = nn.Linear(hidden_dim, output_size)

    def forward(self, input_seq, hidden_state, encoder_outputs):
        embedded = self.embedding(input_seq).view(1, 1, -1)
        embedded = self.dropout(embedded)

        attn_weights = torch.softmax(self.attention(torch.cat((embedded[0], hidden_state[0][0]), 1)), dim=1)
        attn_applied = torch.bmm(attn_weights.unsqueeze(0), encoder_outputs.unsqueeze(0))

        output = torch.cat((embedded[0], attn_applied[0]), 1)
        output = self.attention_combine(output).unsqueeze(0)

        output = torch.relu(output)
        output, hidden_state = self.lstm(output, hidden_state)
        output = torch.log_softmax(self.output_layer(output[0]), dim=1)
        return output, hidden_state, attn_weights

    def init_hidden(self):
        return (torch.zeros(1, 1, self.hidden_dim, device = device),
                torch.zeros(1, 1, self.hidden_dim, device = device))



In [8]:
# Split sentence pairs into training and validation sets
train_pairs, val_pairs = train_test_split(sentence_pairs, test_size=0.1, random_state=42)

# Initialize Datasets and DataLoaders for both sets
train_dataset = TranslationDataset(train_pairs, eng_to_index, fr_to_index)
val_dataset = TranslationDataset(val_pairs, eng_to_index, fr_to_index)

train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False)

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Model, optimizer, and loss function initialization
encoder = SequenceEncoder(vocab_size=len(eng_to_index), hidden_dim=256).to(device)
decoder = AttentionDecoder(hidden_dim=256, output_size=len(fr_to_index), dropout_p=0.1).to(device)
enc_optimizer = optim.SGD(encoder.parameters(), lr=0.01)
dec_optimizer = optim.SGD(decoder.parameters(), lr=0.01)
loss_criterion = nn.NLLLoss()

# Function to calculate accuracy
def calculate_accuracy(predicted, actual):
    predicted_words = predicted.split(' ')
    actual_words = actual.split(' ')
    correct = sum(p == a for p, a in zip(predicted_words, actual_words))
    total = len(actual_words)
    return correct / total

# Training and validation function
def train_and_validate(encoder, decoder, train_loader, val_loader, encoder_optimizer, decoder_optimizer, criterion, num_epochs=100, max_length=MAX_LENGTH):
    for epoch in range(1, num_epochs + 1):
        total_loss = 0
        encoder.train()
        decoder.train()
        for input_tensor, target_tensor in train_loader:
            input_tensor, target_tensor = input_tensor.to(device), target_tensor.to(device)
            loss = train(input_tensor[0], target_tensor[0], encoder, decoder, encoder_optimizer, decoder_optimizer, criterion)
            total_loss += loss

        if epoch % 10 == 0:
            encoder.eval()
            decoder.eval()
            total_val_loss = 0
            total_accuracy = 0
            with torch.no_grad():
                for input_tensor, target_tensor in val_loader:
                    input_tensor, target_tensor = input_tensor.to(device), target_tensor.to(device)
                    predicted_sentence = evaluate(encoder, decoder, ' '.join([index_to_eng[i.item()] for i in input_tensor[0] if i.item() in index_to_eng]), max_length=MAX_LENGTH)
                    actual_sentence = ' '.join([index_to_fr[i.item()] for i in target_tensor[0] if i.item() in index_to_fr])
                    total_accuracy += calculate_accuracy(predicted_sentence, actual_sentence)
                    loss = train(input_tensor[0], target_tensor[0], encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, is_train=False)
                    total_val_loss += loss

            avg_val_loss = total_val_loss / len(val_loader)
            avg_accuracy = total_accuracy / len(val_loader)
            print(f'Epoch {epoch}, Train Loss: {total_loss / len(train_loader)}, Val Loss: {avg_val_loss}, Accuracy: {avg_accuracy}')


In [9]:
# Adjust the train function to include an is_train check
def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, is_train=True, max_length=MAX_LENGTH):
    encoder_hidden = encoder.init_hidden()

    if is_train:
        encoder_optimizer.zero_grad()
        decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    encoder_outputs = torch.zeros(max_length, encoder.hidden_dim, device=device)

    loss = 0

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(input_tensor[ei].unsqueeze(0), encoder_hidden)
        encoder_outputs[ei] = encoder_output[0, 0]

    decoder_input = torch.tensor([[START_TOKEN]], device=device)
    decoder_hidden = encoder_hidden

    for di in range(target_length):
        decoder_output, decoder_hidden, _ = decoder(decoder_input, decoder_hidden, encoder_outputs)
        topv, topi = decoder_output.topk(1)
        decoder_input = topi.squeeze().detach()

        loss += criterion(decoder_output, target_tensor[di].unsqueeze(0))
        if decoder_input.item() == END_TOKEN:
            break

    if is_train:
        loss.backward()
        encoder_optimizer.step()
        decoder_optimizer.step()

    return loss.item() / target_length

# Evaluate function
def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH):
    with torch.no_grad():
        input_tensor = [eng_to_index[word] for word in sentence.split(' ')] + [END_TOKEN]
        input_tensor = torch.tensor(input_tensor, dtype=torch.long).view(-1, 1).to(device)
        input_length = input_tensor.size()[0]
        encoder_hidden = encoder.init_hidden()

        encoder_outputs = torch.zeros(max_length, encoder.hidden_dim, device=device)

        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[ei], encoder_hidden)
            encoder_outputs[ei] += encoder_output[0, 0]

        decoder_input = torch.tensor([[START_TOKEN]], device=device)
        decoder_hidden = encoder_hidden

        decoded_words = []

        for di in range(max_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs)
            topv, topi = decoder_output.data.topk(1)
            if topi.item() == END_TOKEN:
                decoded_words.append('<EOS>')
                break
            else:
                decoded_words.append(index_to_fr[topi.item()])

            decoder_input = topi.squeeze().detach()

        return ' '.join(decoded_words)

# Now, call the train_and_validate function
train_and_validate(encoder, decoder, train_loader, val_loader, enc_optimizer, dec_optimizer, loss_criterion, num_epochs=100)

Epoch 10, Train Loss: 2.952254098531759, Val Loss: 3.325564131708372, Accuracy: 0.05208333333333333
Epoch 20, Train Loss: 2.3581260853864583, Val Loss: 4.322417713346935, Accuracy: 0.06994047619047619
Epoch 30, Train Loss: 1.5105917891387637, Val Loss: 5.139921947604134, Accuracy: 0.11904761904761904
Epoch 40, Train Loss: 0.49480762722672117, Val Loss: 6.094888183900289, Accuracy: 0.15967261904761904
Epoch 50, Train Loss: 0.17384931147475607, Val Loss: 6.729513594650087, Accuracy: 0.14404761904761904
Epoch 60, Train Loss: 0.07944897687401556, Val Loss: 7.219422692344302, Accuracy: 0.15967261904761904
Epoch 70, Train Loss: 0.047157358018411887, Val Loss: 7.546819506372724, Accuracy: 0.14404761904761904
Epoch 80, Train Loss: 0.03321122660639919, Val Loss: 7.713553614275796, Accuracy: 0.14404761904761904
Epoch 90, Train Loss: 0.02567318403514495, Val Loss: 7.855818131424131, Accuracy: 0.14404761904761904
Epoch 100, Train Loss: 0.020704050147993786, Val Loss: 7.977764657565526, Accuracy: 0

In [10]:
def evaluate_and_show_examples(n_examples=5):
    for i in range(n_examples):
        pair = sentence_pairs[i]
        print(f'> {pair[0]}')
        print(f'= {pair[1]}')
        output_sentence = evaluate(encoder, decoder, pair[0])
        print(f'< {output_sentence}\n')

# Call the evaluate_and_show_examples function to see some translations
evaluate_and_show_examples()

> I am cold
= J'ai froid
< Le chat dort bruyamment <EOS>

> You are tired
= Tu es fatigué
< Tu es fatigué <EOS>

> He is hungry
= Il a faim
< Il a faim <EOS>

> She is happy
= Elle est heureuse
< Elle est heureuse <EOS>

> We are friends
= Nous sommes amis
< Nous aimons la musique <EOS>



**Problem: 3**

In [12]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import random

# Dataset placeholder
sentence_pairs = [
    ("I am cold", "J'ai froid"),
    ("You are tired", "Tu es fatigué"),
    ("He is hungry", "Il a faim"),
    ("She is happy", "Elle est heureuse"),
    ("We are friends", "Nous sommes amis"),
    ("They are students", "Ils sont étudiants"),
    ("The cat is sleeping", "Le chat dort"),
    ("The sun is shining", "Le soleil brille"),
    ("We love music", "Nous aimons la musique"),
    ("She speaks French fluently", "Elle parle français couramment"),
    ("He enjoys reading books", "Il aime lire des livres"),
    ("They play soccer every weekend", "Ils jouent au football chaque week-end"),
    ("The movie starts at 7 PM", "Le film commence à 19 heures"),
    ("She wears a red dress", "Elle porte une robe rouge"),
    ("We cook dinner together", "Nous cuisinons le dîner ensemble"),
    ("He drives a blue car", "Il conduit une voiture bleue"),
    ("They visit museums often", "Ils visitent souvent des musées"),
    ("The restaurant serves delicious food", "Le restaurant sert une délicieuse cuisine"),
    ("She studies mathematics at university", "Elle étudie les mathématiques à l'université"),
    ("We watch movies on Fridays", "Nous regardons des films le vendredi"),
    ("He listens to music while jogging", "Il écoute de la musique en faisant du jogging"),
    ("They travel around the world", "Ils voyagent autour du monde"),
    ("The book is on the table", "Le livre est sur la table"),
    ("She dances gracefully", "Elle danse avec grâce"),
    ("We celebrate birthdays with cake", "Nous célébrons les anniversaires avec un gâteau"),
    ("He works hard every day", "Il travaille dur tous les jours"),
    ("They speak different languages", "Ils parlent différentes langues"),
    ("The flowers bloom in spring", "Les fleurs fleurissent au printemps"),
    ("She writes poetry in her free time", "Elle écrit de la poésie pendant son temps libre"),
    ("We learn something new every day", "Nous apprenons quelque chose de nouveau chaque jour"),
    ("The dog barks loudly", "Le chien aboie bruyamment"),
    ("He sings beautifully", "Il chante magnifiquement"),
    ("They swim in the pool", "Ils nagent dans la piscine"),
    ("The birds chirp in the morning", "Les oiseaux gazouillent le matin"),
    ("She teaches English at school", "Elle enseigne l'anglais à l'école"),
    ("We eat breakfast together", "Nous prenons le petit déjeuner ensemble"),
    ("He paints landscapes", "Il peint des paysages"),
    ("They laugh at the joke", "Ils rient de la blague"),
    ("The clock ticks loudly", "L'horloge tic-tac bruyamment"),
    ("She runs in the park", "Elle court dans le parc"),
    ("We travel by train", "Nous voyageons en train"),
    ("He writes a letter", "Il écrit une lettre"),
    ("They read books at the library", "Ils lisent des livres à la bibliothèque"),
    ("The baby cries", "Le bébé pleure"),
    ("She studies hard for exams", "Elle étudie dur pour les examens"),
    ("We plant flowers in the garden", "Nous plantons des fleurs dans le jardin"),
    ("He fixes the car", "Il répare la voiture"),
    ("They drink coffee in the morning", "Ils boivent du café le matin"),
    ("The sun sets in the evening", "Le soleil se couche le soir"),
    ("She dances at the party", "Elle danse à la fête"),
    ("We play music at the concert", "Nous jouons de la musique au concert"),
    ("He cooks dinner for his family", "Il cuisine le dîner pour sa famille"),
    ("They study French grammar", "Ils étudient la grammaire française"),
    ("The rain falls gently", "La pluie tombe doucement"),
    ("She sings a song", "Elle chante une chanson"),
    ("We watch a movie together", "Nous regardons un film ensemble"),
    ("He sleeps deeply", "Il dort profondément"),
    ("They travel to Paris", "Ils voyagent à Paris"),
    ("The children play in the park", "Les enfants jouent dans le parc"),
    ("She walks along the beach", "Elle se promène le long de la plage"),
    ("We talk on the phone", "Nous parlons au téléphone"),
    ("He waits for the bus", "Il attend le bus"),
    ("They visit the Eiffel Tower", "Ils visitent la tour Eiffel"),
    ("The stars twinkle at night", "Les étoiles scintillent la nuit"),
    ("She dreams of flying", "Elle rêve de voler"),
    ("We work in the office", "Nous travaillons au bureau"),
    ("He studies history", "Il étudie l'histoire"),
    ("They listen to the radio", "Ils écoutent la radio"),
    ("The wind blows gently", "Le vent souffle doucement"),
    ("She swims in the ocean", "Elle nage dans l'océan"),
    ("We dance at the wedding", "Nous dansons au mariage"),
    ("He climbs the mountain", "Il gravit la montagne"),
    ("They hike in the forest", "Ils font de la randonnée dans la forêt"),
    ("The cat meows loudly", "Le chat miaule bruyamment"),
    ("She paints a picture", "Elle peint un tableau"),
    ("We build a sandcastle", "Nous construisons un château de sable"),
    ("He sings in the choir", "Il chante dans le chœur")
]

START_TOKEN, END_TOKEN = 0, 1
MAX_LENGTH = 12

def build_vocab(pairs):
    vocab = {"SOS", "EOS"}
    for eng, fr in pairs:
        vocab.update(eng.split(), fr.split())
    return vocab



In [13]:
vocab = build_vocab(sentence_pairs)
word_to_index = {word: i for i, word in enumerate(sorted(vocab))}
index_to_word = {i: word for word, i in word_to_index.items()}

class SentencePairsDataset(Dataset):
    def __init__(self, pairs, word_to_index):
        self.pairs = pairs
        self.word_to_index = word_to_index

    def __len__(self):
        return len(self.pairs)

    def __getitem__(self, idx):
        source_sentence, target_sentence = self.pairs[idx]
        source_tensor = torch.tensor([self.word_to_index[word] for word in source_sentence.split()] + [word_to_index["EOS"]], dtype=torch.long)
        target_tensor = torch.tensor([self.word_to_index[word] for word in target_sentence.split()] + [word_to_index["EOS"]], dtype=torch.long)
        return source_tensor, target_tensor

dataset = SentencePairsDataset(sentence_pairs, word_to_index)
loader = DataLoader(dataset, batch_size=1, shuffle=True)

# Separate a portion of data for validation
random.shuffle(sentence_pairs)
split = int(0.8 * len(sentence_pairs))  # 80-20 train-validation split
train_pairs = sentence_pairs[:split]
val_pairs = sentence_pairs[split:]

val_dataset = SentencePairsDataset(val_pairs, word_to_index)
val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False)  # No need to shuffle for validation

class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output, hidden = self.gru(embedded, hidden)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size)

class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output = torch.relu(embedded)
        output, hidden = self.gru(output, hidden)
        output = self.softmax(self.out(output[0]))
        return output, hidden

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
encoder = EncoderRNN(len(word_to_index), 256).to(device)
decoder = DecoderRNN(256, len(word_to_index)).to(device)
encoder_optimizer = optim.SGD(encoder.parameters(), lr=0.01)
decoder_optimizer = optim.SGD(decoder.parameters(), lr=0.01)
criterion = nn.NLLLoss()



In [14]:
def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):
    encoder_hidden = encoder.initHidden().to(device)

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    loss = 0

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(input_tensor[ei].unsqueeze(0).to(device), encoder_hidden)

    decoder_input = torch.tensor([[START_TOKEN]], device=device)
    decoder_hidden = encoder_hidden

    for di in range(target_length):
        decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
        topv, topi = decoder_output.topk(1)
        decoder_input = topi.squeeze().detach()

        loss += criterion(decoder_output, target_tensor[di].unsqueeze(0))
        if decoder_input.item() == END_TOKEN:
            break

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length

def evaluate(input_tensor, target_tensor, encoder, decoder, criterion):
    with torch.no_grad():
        encoder_hidden = encoder.initHidden().to(device)

        input_length = input_tensor.size(0)
        target_length = target_tensor.size(0)

        loss = 0

        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[ei].unsqueeze(0).to(device), encoder_hidden)

        decoder_input = torch.tensor([[START_TOKEN]], device=device)
        decoder_hidden = encoder_hidden

        for di in range(target_length):
            decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.squeeze().detach()

            loss += criterion(decoder_output, target_tensor[di].unsqueeze(0))
            if decoder_input.item() == END_TOKEN:
                break

        return loss.item() / target_length

def evaluate_accuracy(input_tensor, target_tensor, encoder, decoder):
    with torch.no_grad():
        encoder_hidden = encoder.initHidden().to(device)

        input_length = input_tensor.size(0)
        target_length = target_tensor.size(0)

        correct = 0

        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[ei].unsqueeze(0).to(device), encoder_hidden)

        decoder_input = torch.tensor([[START_TOKEN]], device=device)
        decoder_hidden = encoder_hidden

        predicted_words = []
        for di in range(target_length):
            decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.squeeze().detach()
            predicted_words.append(index_to_word[decoder_input.item()])
            if decoder_input.item() == END_TOKEN:
                break

        predicted_sentence = ' '.join(predicted_words)
        target_sentence = ' '.join([index_to_word[idx.item()] for idx in target_tensor])

        if predicted_sentence == target_sentence:
            correct = 1

        return correct

for epoch in range(1, 101):
    total_loss = 0
    total_correct = 0
    total_samples = 0
    for input_tensor, target_tensor in loader:
        input_tensor, target_tensor = input_tensor.to(device), target_tensor.to(device)
        loss = train(input_tensor[0], target_tensor[0], encoder, decoder, encoder_optimizer, decoder_optimizer, criterion)
        total_loss += loss

        # Calculate accuracy
        correct = evaluate_accuracy(input_tensor[0], target_tensor[0], encoder, decoder)
        total_correct += correct
        total_samples += 1

    if epoch % 10 == 0:
        print(f'Epoch {epoch}, Train Loss: {total_loss / len(loader)}, Accuracy: {total_correct / total_samples * 100}%')

        # Evaluate on validation set
        val_loss = 0
        val_correct = 0
        for input_tensor, target_tensor in val_loader:
            input_tensor, target_tensor = input_tensor.to(device), target_tensor.to(device)
            val_loss += evaluate(input_tensor[0], target_tensor[0], encoder, decoder, criterion)
            val_correct += evaluate_accuracy(input_tensor[0], target_tensor[0], encoder, decoder)
        print(f'Validation Loss: {val_loss / len(val_loader)}, Accuracy: {val_correct / len(val_loader) * 100}%')



Epoch 10, Train Loss: 3.366551143541653, Accuracy: 0.0%
Validation Loss: 3.0788799923090706, Accuracy: 0.0%
Epoch 20, Train Loss: 1.5717783343494607, Accuracy: 20.77922077922078%
Validation Loss: 1.2897491367799894, Accuracy: 18.75%
Epoch 30, Train Loss: 0.28148724442289147, Accuracy: 97.40259740259741%
Validation Loss: 0.24820503422487822, Accuracy: 93.75%
Epoch 40, Train Loss: 0.09173959422663817, Accuracy: 100.0%
Validation Loss: 0.09243703580328395, Accuracy: 100.0%
Epoch 50, Train Loss: 0.04982806209208325, Accuracy: 100.0%
Validation Loss: 0.050230314445844484, Accuracy: 100.0%
Epoch 60, Train Loss: 0.03359718456160922, Accuracy: 100.0%
Validation Loss: 0.034289718046045255, Accuracy: 100.0%
Epoch 70, Train Loss: 0.025141987841329833, Accuracy: 100.0%
Validation Loss: 0.02576245762563, Accuracy: 100.0%
Epoch 80, Train Loss: 0.019996151920823463, Accuracy: 100.0%
Validation Loss: 0.02053751915948288, Accuracy: 100.0%
Epoch 90, Train Loss: 0.01655442230372705, Accuracy: 100.0%
Vali