In [3]:
import torch
import torch.nn as nn
import torch.optim as optim

# Define vocabulary and translation pairs
english_words = ['hello', 'world', 'machine', 'learning', 'deep', 'neural', 'network', 'translate', 'language', 'model']
french_words = ['bonjour', 'monde', 'machine', 'apprentissage', 'profond', 'neuronal', 'réseau', 'traduire', 'langue', 'modèle']

# Create word to index mappings
en_word2idx = {word: idx for idx, word in enumerate(english_words)}
fr_word2idx = {word: idx for idx, word in enumerate(french_words)}
en_idx2word = {idx: word for word, idx in en_word2idx.items()}
fr_idx2word = {idx: word for word, idx in fr_word2idx.items()}

# Encoder-Decoder Model
class Encoder(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(Encoder, self).__init__()
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)
        self.hidden_size = hidden_size

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output, hidden = self.gru(embedded, hidden)
        return output, hidden

    def init_hidden(self):
        return torch.zeros(1, 1, self.hidden_size)

class Decoder(nn.Module):
    def __init__(self, hidden_size, output_size):
        super(Decoder, self).__init__()
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)
        self.hidden_size = hidden_size

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output, hidden = self.gru(embedded, hidden)
        output = self.softmax(self.out(output[0]))
        return output, hidden

# Training the Model
def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=10):
    encoder_hidden = encoder.init_hidden()
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()
    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)
    loss = 0
    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(input_tensor[ei], encoder_hidden)
    decoder_input = torch.tensor([[0]])  # Start of sentence token
    decoder_hidden = encoder_hidden
    for di in range(target_length):
        decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
        topv, topi = decoder_output.topk(1)
        decoder_input = topi.squeeze().detach()
        loss += criterion(decoder_output, target_tensor[di])
        if decoder_input.item() == 1:  # End of sentence token
            break
    loss.backward()
    encoder_optimizer.step()
    decoder_optimizer.step()
    return loss.item() / target_length

# Prepare training data
def tensor_from_sentence(sentence, word2idx):
    indexes = [word2idx[word] for word in sentence.split()]
    indexes.append(1)  # End of sentence token
    return torch.tensor(indexes, dtype=torch.long).view(-1, 1)

# Hyperparameters
hidden_size = 256
learning_rate = 0.01
n_epochs = 1000

# Initialize models, optimizers, and loss function
encoder = Encoder(len(english_words), hidden_size)
decoder = Decoder(hidden_size, len(french_words))
encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
criterion = nn.NLLLoss()

# Training loop
for epoch in range(n_epochs):
    for en_word, fr_word in zip(english_words, french_words):
        input_tensor = tensor_from_sentence(en_word, en_word2idx)
        target_tensor = tensor_from_sentence(fr_word, fr_word2idx)
        loss = train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion)
    if epoch % 100 == 0:
        print(f'Epoch {epoch} Loss {loss:.4f}')

# Translate a word
def translate(word):
    with torch.no_grad():
        input_tensor = tensor_from_sentence(word, en_word2idx)
        input_length = input_tensor.size()[0]
        encoder_hidden = encoder.init_hidden()
        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[ei], encoder_hidden)
        decoder_input = torch.tensor([[0]])  # Start of sentence token
        decoder_hidden = encoder_hidden
        translated_word = ''
        for di in range(10):
            decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
            topv, topi = decoder_output.topk(1)
            if topi.item() == 1:
                break
            translated_word += fr_idx2word[topi.item()] + ' '
            decoder_input = topi.squeeze().detach()
        return translated_word.strip()

# Test the translation
test_words = ['hello', 'world', 'machine']
for word in test_words:
    translated = translate(word)
    print(f'{word} -> {translated}')


Epoch 0 Loss 1.2927
Epoch 100 Loss 0.0258
Epoch 200 Loss 0.0081
Epoch 300 Loss 0.0045
Epoch 400 Loss 0.0031
Epoch 500 Loss 0.0023
Epoch 600 Loss 0.0018
Epoch 700 Loss 0.0015
Epoch 800 Loss 0.0013
Epoch 900 Loss 0.0011
hello -> bonjour
world -> 
machine -> machine
