https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html
https://github.com/bentrevett/pytorch-seq2seq/blob/main/1%20-%20Sequence%20to%20Sequence%20Learning%20with%20Neural%20Networks.ipynb


In [51]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import unicodedata
import re
import random

# Preparing data

In [52]:
SOS_token = 0
EOS_token = 1

class Lang:
    def __init__(self, name):
        self.name = name
        self.word2index = {}
        self.word2count = {}
        self.index2word = {SOS_token: 'SOS', EOS_token: 'EOS'}
        self.n_words = 2    # for SOS and EOS tokens
        
    def add_sentence(self, sentence):
        for word in sentence.split(' '):
            self.add_word(word)
            
    def add_word(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.word2count[word] = 1
            self.index2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] += 1

In [53]:
def unicode_to_ascii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
    )

# Lowercase, trim, and remove non-letter characters
def normalize_string(s):
    s = unicode_to_ascii(s.lower().strip())
    s = re.sub(r"([.!?])", r" \1", s)
    s = re.sub(r"[^a-zA-Z!?]+", r" ", s)
    return s.strip()

In [54]:
def read_langs(lang1: str, lang2: str, reverse=False):
    lines = open("data/%s-%s.txt" % (lang1, lang2), encoding="utf-8").readlines()
    pairs = [[normalize_string(s) for s in l.split('\t')] for l in lines]
    
    if reverse:
        pairs = [list(reversed(p)) for p in pairs]
        input_lang = Lang(lang2)
        output_lang = Lang(lang1)
    else:
        input_lang = Lang(lang1)
        output_lang = Lang(lang2)
        
    return input_lang, output_lang, pairs

In [55]:
input_lang, output_lang, pairs = read_langs('eng', 'fra')
for pair in pairs:
    input_lang.add_sentence(pair[0])
    output_lang.add_sentence(pair[1])

In [56]:
MAX_LENGTH = 100

# Neural network module

In [57]:
class Encoder(nn.Module):
    def __init__(self, input_size, hidden_size, dropout_p=0.1):
        super(Encoder, self).__init__()
        self.hidden_size = hidden_size
        
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.dropout = nn.Dropout(dropout_p)
        
    def forward(self, x):
        embedded = self.dropout(self.embedding(x))
        output, hidden = self.gru(embedded)
        return output, hidden

In [58]:
class Decoder(nn.Module):
    def __init__(self, hidden_size, output_size):
        super(Decoder, self).__init__()
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.out = nn.Linear(hidden_size, output_size)
        
    def forward(self, encoder_outputs, encoder_hidden, target_tensor=None):
        batch_size = encoder_outputs.size(0)
        decoder_input = torch.empty(batch_size, 1, dtype=torch.long, device=encoder_outputs.device).fill_(SOS_token)
        decoder_hidden = encoder_hidden
        decoder_outputs = []
        
        for i in range(MAX_LENGTH):
            decoder_output, decoder_hidden = self.forward_step(decoder_input, decoder_hidden)
            decoder_outputs.append(decoder_output)
            
            if target_tensor is not None:
                print(target_tensor[:, i].unsqueeze(1))
                decoder_input = target_tensor[:, i].unsqueeze(1)
            else:
                _, topi = decoder_output.topk(1)
                decoder_input = topi.squeeze(-1).detach()
                
        decoder_outputs = torch.cat(decoder_outputs, dim=1)
        decoder_outputs = F.log_softmax(decoder_outputs, dim=-1)
        
        return decoder_outputs, decoder_hidden, None
    
    def forward_step(self, input, hidden):
        output = self.embedding(input)
        output = F.relu(output)
        output, hidden = self.gru(output, hidden)
        output = self.out(output)
        
        return output, hidden

# Training and evaluation of this initial encoder-decoder

In [59]:
def indexes_from_sentence(lang, sentence):
    return [lang.word2index[word] for word in sentence.split(' ')]

def tensor_from_sentence(lang, sentence):
    indexes = indexes_from_sentence(lang, sentence)
    indexes.append(EOS_token)
    return torch.tensor(indexes, dtype=torch.long).view(-1, 1)

def tensors_from_pair(pair):
    input_tensor = tensor_from_sentence(input_lang, pair[0])
    target_tensor = tensor_from_sentence(output_lang, pair[1])
    return input_tensor, target_tensor

In [60]:
hidden_size = 128
encoder = Encoder(input_lang.n_words, hidden_size)
decoder = Decoder(hidden_size, output_lang.n_words)

In [61]:
criterion = nn.NLLLoss()

learning_rate = 0.01
encoder_optimizer = torch.optim.SGD(encoder.parameters(), lr=learning_rate)
decoder_optimizer = torch.optim.SGD(decoder.parameters(), lr=learning_rate)

for i in range(0, 1000):
    training_examples = random.choices(pairs, k=5000)
    
    for p in training_examples:
        i, t = tensors_from_pair(p)
        
        encoder_optimizer.zero_grad()
        decoder_optimizer.zero_grad()
    
        encoder_output, encoder_hidden = encoder(i)
        print(t)
        decoder_output, _, _ = decoder(encoder_output, encoder_hidden, t)
        
        loss = criterion(decoder_output, t)
        loss.backward()
        
        encoder_optimizer.step()
        decoder_optimizer.step()
    

tensor([[ 932],
        [ 629],
        [ 126],
        [ 212],
        [  62],
        [1407],
        [  37],
        [   1]])
tensor([[ 932],
        [ 629],
        [ 126],
        [ 212],
        [  62],
        [1407],
        [  37],
        [   1]])


RuntimeError: Expected hidden size (1, 8, 128), got [1, 7, 128]

In [62]:
input, target = tensors_from_pair(training_examples[0])
output, hidden = encoder(input)
output

output.size(0)
batch_size = output.size(0)
decoder_input = torch.empty(batch_size, 1, dtype=torch.long).fill_(SOS_token)
decoder_input

decoder(output, hidden)

(tensor([[[ -9.8524, -10.0098,  -9.6874,  ...,  -9.7908,  -9.8662, -10.2059],
          [ -9.6220, -10.1778,  -9.6987,  ...,  -9.8464, -10.0043, -10.2546],
          [ -9.8379, -10.1626,  -9.7662,  ...,  -9.8453, -10.0372, -10.2308],
          ...,
          [ -9.8341, -10.0373,  -9.8417,  ...,  -9.7817, -10.0032, -10.1979],
          [ -9.8205,  -9.9820,  -9.8593,  ...,  -9.9877, -10.0537, -10.2796],
          [ -9.8299,  -9.9611,  -9.8327,  ...,  -9.8110, -10.0717, -10.1962]],
 
         [[ -9.8067,  -9.9969,  -9.8850,  ...,  -9.9026,  -9.8638, -10.2110],
          [ -9.7569, -10.0952,  -9.8525,  ...,  -9.6568, -10.0844, -10.1839],
          [ -9.7408, -10.1989,  -9.8038,  ...,  -9.7969,  -9.7845, -10.1629],
          ...,
          [-10.0139, -10.3328,  -9.7442,  ...,  -9.6685, -10.2844, -10.2150],
          [-10.0139, -10.3328,  -9.7442,  ...,  -9.6685, -10.2844, -10.2150],
          [-10.0139, -10.3328,  -9.7442,  ...,  -9.6685, -10.2844, -10.2150]],
 
         [[ -9.8121, -10.149

In [None]:
[tensors_from_pair(p)[0] for p in training_examples]