<a href="https://colab.research.google.com/github/archyyu/translation-from-RNN-to-transformer/blob/main/machine_translation_by_rnn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
import requests
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import torch.nn.functional as F
import matplotlib.pyplot as plt # for making figures
%matplotlib inline

# Set random seed for reproducibility
torch.manual_seed(42)

<torch._C.Generator at 0x7b229c224130>

In [86]:
hidden_size = 100
embedding_dim = 20
learning_rate = 1e-1
batch_size = 50
line_len = 30
shuffle = True

In [87]:
url = "https://raw.githubusercontent.com/archyyu/publicResource/main/eng-fra.txt"
response = requests.get(url)
lines = response.text.split('\n')
en_lines = []
fr_lines = []

start_character = '<'
end_character = '>'
padding_character = '&'

for i in range(20000,40000):
  item = lines[i].split('\t')
  en_lines.append('<' + item[0] + '>')
  fr_lines.append('<' + item[1] + '>')

max_len_line_en = max([len(l) for l in en_lines])
max_len_line_fr = max([len(l) for l in fr_lines])

for i in range(len(en_lines)):
  if (len(en_lines[i]) < max_len_line_en):
    en_lines[i] = en_lines[i].ljust(max_len_line_en, padding_character)
  if (len(fr_lines[i]) < max_len_line_fr):
    fr_lines[i] = fr_lines[i].ljust(max_len_line_fr, padding_character)


source_vocab = set(''.join(en_lines))
target_vocab = set(''.join(fr_lines))

source_vocab_size = len(set(''.join(en_lines)))
target_vocab_size = len(set(''.join(fr_lines)))

source_char_to_ix = {ch: i for i, ch in enumerate(source_vocab)}
source_ix_to_char = {i: ch for i, ch in enumerate(source_vocab)}

target_char_to_ix = {ch: i for i, ch in enumerate(target_vocab)}
target_ix_to_char = {i: ch for i, ch in enumerate(target_vocab)}

In [None]:
en_lines

In [88]:
def line_to_tensor(line):
  result = []
  line_ten = torch.tensor([source_char_to_ix[ch] for ch in test_line], dtype=torch.long).view(1, -1)
  result.append(line_ten)
  return torch.cat(result, dim=0)

en_data = []
fr_data = []
for i in range(len(en_lines)):
  e = torch.tensor([source_char_to_ix[ch] for ch in en_lines[i]], dtype=torch.long).view(1, -1)
  en_data.append(e)
  f = torch.tensor([target_char_to_ix[ch] for ch in fr_lines[i]], dtype=torch.long).view(1, -1)
  fr_data.append(f)

en_data = torch.cat(en_data, dim=0)
fr_data = torch.cat(fr_data, dim=0)

In [93]:
class Encoder(nn.Module):
  def __init__(self, vocab_size, embedding_dim, hidden_size):
    super(Encoder, self).__init__()
    self.hidden_size = hidden_size
    self.embedding = nn.Embedding(vocab_size, embedding_dim)
    self.i2h = nn.Linear(embedding_dim, hidden_size, bias=False)
    self.h2h = nn.Linear(hidden_size, hidden_size, bias=False)
    self.hb2 = nn.Parameter(torch.zeros(1, hidden_size))

  def forward(self, x):
    h = torch.zeros((1, 1, self.hidden_size))
    for i in range(x.shape[1]):
      t = self.embedding(x[:,i])
      h = torch.tanh(self.i2h(t) + self.h2h(h) + self.hb2)
    return h


class Decoder(nn.Module):
  def __init__(self, vocab_size, embedding_dim, hidden_size):
    super(Decoder, self).__init__()
    self.hidden_size = hidden_size
    self.embedding_dim = embedding_dim
    self.embedding = nn.Embedding(vocab_size, self.embedding_dim)
    self.i2h = nn.Linear(self.embedding_dim, self.hidden_size, bias=False)
    self.h2h = nn.Linear(self.hidden_size, self.hidden_size, bias=False)
    self.h2o = nn.Linear(self.hidden_size, vocab_size, bias=False)

    self.hb2 = nn.Parameter(torch.zeros(1, self.hidden_size))
    self.ob = nn.Parameter(torch.zeros(1, vocab_size))

  def forward(self, x, h):
    output = []
    if x is None:
      x = torch.tensor([[target_char_to_ix[start_character]]],dtype=torch.long)
      for i in range(max_len_line_fr):
        t = self.embedding(x)
        h = torch.tanh(self.i2h(t) + self.h2h(h) + self.hb2)
        y = self.h2o(h) + self.ob
        p = nn.functional.softmax(y, dim=-1).detach().numpy().ravel()
        ix = np.random.choice(range(y.shape[-1]), p=p)
        # p = nn.functional.softmax(y, dim=-1)
        # ix = torch.argmax(p).item()
        if ix == target_char_to_ix[end_character]:
          break
        x = torch.tensor([[ix]], dtype=torch.long)
        output.append(x)
    else:
      for i in range(x.shape[1]):
        t = self.embedding(x[:,i])
        h = torch.tanh(self.i2h(t) + self.h2h(h) + self.hb2)
        y = self.h2o(h) + self.ob
        output.append(y)
    return output

class Seq2Seq(nn.Module):
  def __init__(self, source_vocab_size, target_vocab_size, embedding_dim, hidden_size):
    super(Seq2Seq, self).__init__()
    self.embedding_dim = embedding_dim
    self.hidden_size = hidden_size
    self.encoder = Encoder(source_vocab_size, self.embedding_dim, self.hidden_size)
    self.decoder = Decoder(target_vocab_size, self.embedding_dim, self.hidden_size)
  def forward(self, source, target):
    hidden_state = self.encoder(source)
    output = self.decoder(target, hidden_state)
    return torch.cat(output, dim=0)

# Define your model, loss function, and optimizer
model = Seq2Seq(source_vocab_size, target_vocab_size, embedding_dim, hidden_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
#training
import torch.optim as optim

num_epochs = 3

# Training loop
for epoch in range(num_epochs):
  for p in range(len(en_data) - batch_size - 1):

    source_batch = en_data[p:p+batch_size]
    target_batch = fr_data[p:p+batch_size]

    optimizer.zero_grad()

    # Forward pass
    output = model(source_batch, target_batch)

    loss = criterion(output.view(-1, target_vocab_size), target_batch.view(-1))

    loss.backward()
    optimizer.step()

    if p%100 == 0:
      # Print or log the training loss for each epoch
      print(f'p {p}, Loss: {loss.item()}')

    p += batch_size


p 0, Loss: 3.251887321472168
p 100, Loss: 2.9494118690490723
p 200, Loss: 2.3397650718688965
p 300, Loss: 2.3574161529541016
p 400, Loss: 3.7748372554779053
p 500, Loss: 5.193264007568359
p 600, Loss: 2.66845965385437
p 700, Loss: 2.1784262657165527
p 800, Loss: 1.7893831729888916
p 900, Loss: 1.9908764362335205
p 1000, Loss: 2.730186939239502
p 1100, Loss: 1.5725866556167603
p 1200, Loss: 2.3945751190185547
p 1300, Loss: 1.869131088256836
p 1400, Loss: 1.784009575843811
p 1500, Loss: 1.761156439781189
p 1600, Loss: 8.86120319366455
p 1700, Loss: 9.07079792022705
p 1800, Loss: 6.381490707397461
p 1900, Loss: 6.845973491668701
p 2000, Loss: 4.543936729431152
p 2100, Loss: 1.9161080121994019
p 2200, Loss: 1.7431401014328003
p 2300, Loss: 1.9001036882400513
p 2400, Loss: 2.060920000076294
p 2500, Loss: 2.929377317428589
p 2600, Loss: 2.716270923614502
p 2700, Loss: 2.191983461380005
p 2800, Loss: 2.3377885818481445
p 2900, Loss: 1.7934740781784058
p 3000, Loss: 6.534148216247559
p 3100, L

In [109]:
test_line = "<closer look!>"

input = line_to_tensor(test_line)
output = model(input, None).view(-1)

o = [target_ix_to_char[ch.item()] for ch in output]
print(''.join(o))


&&.&&&&&&&&&&& À&&&tÊt&&uhê&&&&&&l&&&&&&&& ÀV&aTr&&.i&&sTr&&&aU&&&&&&i&.V<
