<a href="https://colab.research.google.com/github/chang-heekim/Implementation_Deep_Learning_Paper/blob/main/Sequence_To_Sequence_Learning_With_Neural_Networks/Seq2Seq_Pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Donwload Spacy Libaray

In [None]:
!python -m spacy download en
!python -m spacy download de

Collecting en_core_web_sm==2.2.5
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.2.5/en_core_web_sm-2.2.5.tar.gz (12.0 MB)
[K     |████████████████████████████████| 12.0 MB 5.3 MB/s 
[38;5;2m✔ Download and installation successful[0m
You can now load the model via spacy.load('en_core_web_sm')
[38;5;2m✔ Linking successful[0m
/usr/local/lib/python3.7/dist-packages/en_core_web_sm -->
/usr/local/lib/python3.7/dist-packages/spacy/data/en
You can now load the model via spacy.load('en')
Collecting de_core_news_sm==2.2.5
  Downloading https://github.com/explosion/spacy-models/releases/download/de_core_news_sm-2.2.5/de_core_news_sm-2.2.5.tar.gz (14.9 MB)
[K     |████████████████████████████████| 14.9 MB 5.2 MB/s 
Building wheels for collected packages: de-core-news-sm
  Building wheel for de-core-news-sm (setup.py) ... [?25l[?25hdone
  Created wheel for de-core-news-sm: filename=de_core_news_sm-2.2.5-py3-none-any.whl size=14907055 sha256=be2ce35

# Load Necessary Library & Set up Device, Hyper parameters

In [None]:
import numpy as np
import random
import math

import torch
from torch import nn, optim

import spacy
from torchtext.legacy.datasets import Multi30k
from torchtext.legacy.data import Field, BucketIterator

from torchsummary import summary

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
batch_size = 128
epochs = 20

# Preprocessing Data

In [None]:
spacy_en = spacy.load('en')
spacy_de = spacy.load('de')

In [None]:
tokenized = spacy_en.tokenizer('Good to see you.')
for idx, token in enumerate(tokenized):
    print(f'{idx}: {token}')

0: Good
1: to
2: see
3: you
4: .


In [None]:
def tokenize_en(text):
    return[token.text for token in spacy_en.tokenizer(text)][::-1]

def tokenize_de(text):
    return[token.text for token in spacy_de.tokenizer(text)]

In [None]:
SRC = Field(tokenize=tokenize_en, init_token='<sos>', eos_token='<eos>', lower=True)
TRG = Field(tokenize=tokenize_de, init_token='<sos>', eos_token='<eos>', lower=True)

In [None]:
train_data, valid_data, test_data = Multi30k.splits(exts=('.en', '.de'), fields=(SRC,TRG))

In [None]:
print(f'Number of training examples: {len(train_data.examples)}')
print(f'Number of validation examples: {len(valid_data.examples)}')
print(f'Number of testing examples: {len(test_data.examples)}')

Number of training examples: 29000
Number of validation examples: 1014
Number of testing examples: 1000


In [None]:
SRC.build_vocab(train_data, min_freq=2)
TRG.build_vocab(train_data, min_freq=2)

print(f'length SRC: {len(SRC.vocab)}')
print(f'length TRG: {len(TRG.vocab)}')

length SRC: 5893
length TRG: 7855


In [None]:
train_iterator, valid_iterator, test_iterator = BucketIterator.splits(
    (train_data, valid_data, test_data),
    batch_size=batch_size,
    device=device
)

In [None]:
for text in train_iterator:
    src = text.src
    trg = text.trg
    for i in range(src.shape[0]):
        print(f'Index {i}: {src[i][0].item()}')
    
    break

Index 0: 2
Index 1: 5
Index 2: 1324
Index 3: 1385
Index 4: 18
Index 5: 853
Index 6: 33
Index 7: 24
Index 8: 4
Index 9: 489
Index 10: 10
Index 11: 1211
Index 12: 1534
Index 13: 4
Index 14: 3
Index 15: 1
Index 16: 1
Index 17: 1
Index 18: 1
Index 19: 1
Index 20: 1
Index 21: 1
Index 22: 1
Index 23: 1
Index 24: 1
Index 25: 1
Index 26: 1
Index 27: 1
Index 28: 1
Index 29: 1
Index 30: 1
Index 31: 1
Index 32: 1
Index 33: 1
Index 34: 1


# Define Encdoer, Decoder, Seq2Seq


In [None]:
class Encoder(nn.Module):
    def __init__(self, input_dim, embed_dim, hid_dim, n_layers, dropout_prob):
        super().__init__()

        self.embedding = nn.Embedding(input_dim, embed_dim)
        self.rnn = nn.LSTM(embed_dim, hid_dim, n_layers, dropout=dropout_prob)
        self.dropout = nn.Dropout(dropout_prob)

    def forward(self, src):
        # src shape: [Vocab Size, Batch_size]
        embedding_layer = self.dropout(self.embedding(src))
        # embedding_layer shape: [Vocab Size, Batch_size, Embed_dim]
        
        outputs, (hidden, cell) = self.rnn(embedding_layer)
        # outputs shape: [Vocab Size, Batch_size, hid_dim]
        # hidden shape: [n_layers, Batch_size, hid_dim] 
        # cell shape: [n_layers, Batch_size, hid_dim]
        return hidden, cell

In [None]:
class Decoder(nn.Module):
    def __init__(self, output_dim, emb_dim, hid_dim, n_layers, dropout_prob):
        super().__init__()
        self.output_dim = output_dim

        self.embedding = nn.Embedding(output_dim, emb_dim)
        self.rnn = nn.LSTM(emb_dim, hid_dim, n_layers, dropout=dropout_prob)
        self.fc = nn.Linear(hid_dim, output_dim)
        self.dropout = nn.Dropout(dropout_prob)

    def forward(self, input, hidden, cell): 
        input = input.unsqueeze(0)
        # input shape: [1, Batch_size]

        embedding_layer = self.dropout(self.embedding(input))
        # embedding_layer shape: [Vocab Size, Batch_size, Embed_dim]

        outputs, (hidden, cell) = self.rnn(embedding_layer, (hidden, cell))
        # outputs shape: [1, Batch_size, hid_dim]
        # hidden shape: [n_layers, Batch_size, hid_dim] 
        # cell shape: [n_layers, Batch_size, hid_dim] 

        pred = self.fc(outputs.squeeze(0))
        # pred shape: [Batch_size, output_dim]
        return pred, hidden, cell

In [None]:
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder):
        super().__init__()

        self.encoder = encoder
        self.decoder = decoder

    def forward(self, src, trg, teacher_forcing_ratio=0.5):
        hidden, cell = self.encoder(src)

        trg_len = trg.shape[0]
        batch_size = trg.shape[1]
        trg_vocab_size = self.decoder.output_dim
        outputs = torch.zeros(trg_len, batch_size, trg_vocab_size).to(device)

        input = trg[0, :]
        
        for t in range(1, trg_len):
            output, hidden, cell = self.decoder(input, hidden, cell)

            outputs[t] = output
            top1 = output.argmax(1)

            teacher_force = random.random() < teacher_forcing_ratio
            input = trg[t] if teacher_force else top1

        return outputs

# Function to initialize model parameters

In [None]:
def init_weights(model):
    for name, param in model.named_parameters():
        nn.init.uniform_(param.data, -0.08, 0.08)

# Set Model, Optimizer, Loss

In [None]:
encoder = Encoder(len(SRC.vocab), 256, 512, 2, 0.5)
decoder = Decoder(len(TRG.vocab), 256, 512, 2, 0.5)
model = Seq2Seq(encoder,decoder).to(device)

model.apply(init_weights)

optimizer = optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss(ignore_index=TRG.vocab.stoi[TRG.pad_token])

In [None]:
for epoch in range(1, epochs + 1):
    model.train()
    current_loss = 0.0
    print(f'[Epoch {epoch} / {epochs}] [', end='')
    for idx, batch in enumerate(train_iterator):
        src, trg = batch.src, batch.trg
        output = model(src, trg)

        optimizer.zero_grad()
        output = output[1:].view(-1, output.shape[-1])

        trg = trg[1:].view(-1)
        loss = criterion(output, trg)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1)
        optimizer.step()
        
        current_loss += loss.item()
        if idx % 10 == 0:
            print('-', end='')

    train_loss = current_loss / len(train_iterator)
    print(f'->] Train Loss: {train_loss}, Train PPL: {math.exp(train_loss):7.3f} ', end='')

    model.eval()
    with torch.no_grad():
        val_loss = 0.0
        for idx, batch in enumerate(valid_iterator):
            src, trg = batch.src, batch.trg

            output = model(src, trg, 0)
            output_dim = output.shape[-1]
            
            output = output[1:].view(-1, output_dim)
            preds = output.argmax(1)
            trg = trg[1:].view(-1)

            loss = criterion(output, trg)

            val_loss += loss.item()

        val_loss = val_loss / len(valid_iterator)
    print(f' Validation Loss: {val_loss}, Validation PPL: {math.exp(val_loss):7.3f}')

[Epoch 1 / 20] [------------------------>] Train Loss: 5.181033023128426, Train PPL: 177.866  Validation Loss: 5.0119956731796265, Validation PPL: 150.204
[Epoch 2 / 20] [------------------------>] Train Loss: 4.585800250721398, Train PPL:  98.082  Validation Loss: 4.906110525131226, Validation PPL: 135.113
[Epoch 3 / 20] [------------------------>] Train Loss: 4.291385308236277, Train PPL:  73.068  Validation Loss: 4.78971654176712, Validation PPL: 120.267
[Epoch 4 / 20] [------------------------>] Train Loss: 4.083556217244018, Train PPL:  59.356  Validation Loss: 4.633901119232178, Validation PPL: 102.915
[Epoch 5 / 20] [------------------------>] Train Loss: 3.928897341967679, Train PPL:  50.851  Validation Loss: 4.523829877376556, Validation PPL:  92.188
[Epoch 6 / 20] [------------------------>] Train Loss: 3.8191933012218726, Train PPL:  45.567  Validation Loss: 4.488210707902908, Validation PPL:  88.962
[Epoch 7 / 20] [------------------------>] Train Loss: 3.7105394344497884, 

# Evaluate Test Dataset

In [None]:
model.eval()
with torch.no_grad():
    test_loss = 0.0
    test_correct = 0.0
    for idx, batch in enumerate(test_iterator):
        src, trg = batch.src, batch.trg

        output = model(src, trg, 0)
        output_dim = output.shape[-1]
        
        output = output[1:].view(-1, output_dim)
        preds = output.argmax(1)
        trg = trg[1:].view(-1)

        loss = criterion(output, trg)

        test_loss += loss.item()

    test_loss = test_loss / len(valid_iterator)
print(f' Test Loss: {test_loss}, Test PPL: {math.exp(test_loss):7.3f}')

 Test Loss: 8.87286627292633, Test PPL: 7135.705


# Function that translates a single sentence

In [None]:
def translate_sentence(sentence, src_field, trg_field, model, device, max_len=50):
    model.eval() 

    if isinstance(sentence, str):
        nlp = spacy.load('en')
        tokens = [token.text.lower() for token in nlp(sentence)]
    else:
        tokens = [token.lower() for token in sentence]

    tokens = [src_field.init_token] + tokens + [src_field.eos_token]
    src_indexes = [src_field.vocab.stoi[token] for token in tokens]
    src_tensor = torch.LongTensor(src_indexes).unsqueeze(1).to(device)

    with torch.no_grad():
        hidden, cell = model.encoder(src_tensor)

    trg_indexes = [trg_field.vocab.stoi[trg_field.init_token]]

    for i in range(max_len):
        trg_tensor = torch.LongTensor([trg_indexes[-1]]).to(device)

        with torch.no_grad():
            output, hidden, cell = model.decoder(trg_tensor, hidden, cell)

        pred_token = output.argmax(1).item()
        trg_indexes.append(pred_token) 

        if pred_token == trg_field.vocab.stoi[trg_field.eos_token]:
            break

    trg_tokens = [trg_field.vocab.itos[i] for i in trg_indexes]

    return trg_tokens[1:-1]

# Translate Sentence

In [None]:
example_idx = 1

src = vars(test_data.examples[example_idx])['src'][::-1]
trg = vars(test_data.examples[example_idx])['trg']

print(f'Source Sequence: {" ".join(src)}')
print(f'Target Sequence: {" ".join(trg)}')
print("Pred Sequence:", " ".join(translate_sentence(src, SRC, TRG, model, device)))

Source Sequence: man sitting using tool at a table in his home .
Target Sequence: ein sitzender mann , der an einem tisch in seinem haus mit einem werkzeug arbeitet .
Pred Sequence: ein mann singt in einem restaurant , während ein anderer mann auf dem . .
