<a href="https://colab.research.google.com/github/archyyu/translation-from-RNN-to-transformer/blob/main/machine_translation_by_rnn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import requests
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt # for making figures
%matplotlib inline

# Set random seed for reproducibility
torch.manual_seed(42)

<torch._C.Generator at 0x7cc8e4348330>

In [2]:
hidden_size = 100
embedding_dim = 20
learning_rate = 1e-1
batch_size = 20
line_len = 30
shuffle = True

In [3]:
class Encoder(nn.Module):
  def __init__(self, vocab_size, embedding_dim, hidden_size):
    super(Encoder, self).__init__()
    self.hidden_size = hidden_size
    self.embedding = nn.Embedding(vocab_size, embedding_dim)
    self.i2h = nn.Linear(embedding_dim, hidden_size, bias=False)
    self.h2h = nn.Linear(hidden_size, hidden_size, bias=False)

    self.hb2 = nn.Parameter(torch.zeros(1, hidden_size))

  def forward(self, x):
    h = torch.zeros((1, 1, self.hidden_size))
    for i in range(x.shape[1]):
      t = self.embedding(x[:,i])
      h = torch.tanh(self.i2h(t) + self.h2h(h) + self.hb2)
    return h

class Decoder(nn.Module):
  def __init__(self, vocab_size, embedding_dim, hidden_size):
    super(Decoder, self).__init__()
    self.hidden_size = hidden_size
    self.embedding_dim = embedding_dim
    self.embedding = nn.Embedding(vocab_size, self.embedding_dim)
    self.i2h = nn.Linear(self.embedding_dim, self.hidden_size, bias=False)
    self.h2h = nn.Linear(self.hidden_size, self.hidden_size, bias=False)
    self.h2o = nn.Linear(self.hidden_size, vocab_size, bias=False)

    self.hb2 = nn.Parameter(torch.zeros(1, self.hidden_size))
    self.ob = nn.Parameter(torch.zeros(1, vocab_size))

  def forward(self, x, h):
    output = []
    for i in range(x.shape[1]):
      t = self.embedding(x[:,i])
      h = torch.tanh(self.i2h(t) + self.h2h(h) + self.hb2)
      y = self.h2o(h) + self.ob
      output.append(y)
    return output

In [4]:
class Seq2Seq(nn.Module):
  def __init__(self, source_vocab_size, target_vocab_size, embedding_dim, hidden_size):
    super(Seq2Seq, self).__init__()
    self.embedding_dim = embedding_dim
    self.hidden_size = hidden_size
    self.encoder = Encoder(source_vocab_size, self.embedding_dim, self.hidden_size)
    self.decoder = Decoder(target_vocab_size, self.embedding_dim, self.hidden_size)
  def forward(self, source, target):
    hidden_state = self.encoder(source)
    output = self.decoder(target, hidden_state)
    return torch.cat(output, dim=0)

In [5]:
url = "https://raw.githubusercontent.com/archyyu/publicResource/main/eng-fra.txt"
response = requests.get(url)
lines = response.text.split('\n')
en_lines = []
fr_lines = []

start_character = '<'
end_character = '>'
padding_character = '&'

for i in range(20000,40000):
  item = lines[i].split('\t')
  en_lines.append('<' + item[0] + '>')
  fr_lines.append('<' + item[1] + '>')

max_len_line_en = max([len(l) for l in en_lines])
max_len_line_fr = max([len(l) for l in fr_lines])

for i in range(len(en_lines)):
  if (len(en_lines[i]) < max_len_line_en):
    en_lines[i] = en_lines[i].ljust(max_len_line_en, padding_character)
  if (len(fr_lines[i]) < max_len_line_fr):
    fr_lines[i] = fr_lines[i].ljust(max_len_line_fr, padding_character)


source_vocab = set(''.join(en_lines))
target_vocab = set(''.join(fr_lines))

source_vocab_size = len(set(''.join(en_lines)))
target_vocab_size = len(set(''.join(fr_lines)))

source_char_to_ix = {ch: i for i, ch in enumerate(source_vocab)}
source_ix_to_char = {i: ch for i, ch in enumerate(source_vocab)}

target_char_to_ix = {ch: i for i, ch in enumerate(target_vocab)}
target_ix_to_char = {i: ch for i, ch in enumerate(target_vocab)}

In [6]:
en_data = []
fr_data = []
for i in range(len(en_lines)):
  e = torch.tensor([source_char_to_ix[ch] for ch in en_lines[i]], dtype=torch.long).view(1, -1)
  en_data.append(e)
  f = torch.tensor([target_char_to_ix[ch] for ch in fr_lines[i]], dtype=torch.long).view(1, -1)
  fr_data.append(f)

en_data = torch.cat(en_data, dim=0)
fr_data = torch.cat(fr_data, dim=0)

tensor([10, 84, 21, 32, 58, 21, 85, 52, 32, 45, 85, 21, 32, 58, 41, 85, 61, 32,
        74,  9, 46, 33, 78, 50, 21, 25,  5, 13, 54, 54, 54, 54, 54, 54, 54, 54,
        54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54,
        54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54,
        54, 54])

In [None]:
p = 10
input = en_data[p:p+batch_size]
input.shape

torch.Size([20, 25])

In [None]:
input

In [None]:
input[:,0]

tensor([8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8])

In [None]:
#training
import torch.optim as optim

num_epochs = 3

# Define your model, loss function, and optimizer
encoder = Encoder(source_vocab_size, embedding_dim, hidden_size)
model = Seq2Seq(source_vocab_size, target_vocab_size, embedding_dim, hidden_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(num_epochs):
  for p in range(len(en_data) - batch_size - 1):

    source_batch = en_data[p:p+batch_size]
    target_batch = fr_data[p:p+batch_size]

    optimizer.zero_grad()

    # Forward pass
    output = model(source_batch, target_batch)

    loss = criterion(output.view(-1, target_vocab_size), target_batch.view(-1))


    loss.backward()
    optimizer.step()

    if p%100 == 0:
      # Print or log the training loss for each epoch
      print(f'p {p}, Loss: {loss.item()}')



p 0, Loss: 4.728506565093994
p 100, Loss: 1.7974867820739746
p 200, Loss: 2.522261142730713
p 300, Loss: 4.206422328948975
p 400, Loss: 2.211564779281616
p 500, Loss: 1.787145972251892
p 600, Loss: 4.528811454772949
p 700, Loss: 5.3500752449035645
p 800, Loss: 2.0990705490112305
p 900, Loss: 4.296488285064697
p 1000, Loss: 2.886258602142334
p 1100, Loss: 1.683642864227295
p 1200, Loss: 2.2459874153137207
p 1300, Loss: 2.221745252609253
p 1400, Loss: 2.273402452468872
p 1500, Loss: 1.7251325845718384
p 1600, Loss: 2.1542458534240723
p 1700, Loss: 1.8772292137145996
p 1800, Loss: 4.139653205871582
p 1900, Loss: 4.822388172149658
p 2000, Loss: 7.3139238357543945
p 2100, Loss: 2.6425466537475586
p 2200, Loss: 2.130760669708252
p 2300, Loss: 1.7104228734970093
p 2400, Loss: 1.7202826738357544
p 2500, Loss: 3.5414130687713623
p 2600, Loss: 4.9831862449646
p 2700, Loss: 2.7417476177215576
p 2800, Loss: 1.9318255186080933
p 2900, Loss: 2.483248233795166
p 3000, Loss: 3.4351084232330322
p 3100,