# Language Translation

In [1]:
import torch 
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, random_split, DataLoader

import torchtext

from torchsummary import summary

import spacy

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import os
import time
import math
from PIL import Image
import glob
from IPython.display import display

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


HYPERPARAMETERS

In [3]:
BATCH_SIZE = 64
LR = 1e-4
NUM_EPOCHES = 3

## Preprocessing

In [4]:
nlp_english = spacy.load("en")

In [5]:
nlp_german = spacy.load("de")

In [6]:
def tokenizer_english(text):
    return [token.text for token in nlp_english.tokenizer(text)]

In [7]:
def tokenizer_german(text):
    return [token.text for token in nlp_german.tokenizer(text)]

In [8]:
tokenizer_english("Hi guys, my name Jeff")

['Hi', 'guys', ',', 'my', 'name', 'Jeff']

In [9]:
tokenizer_german("I dont know any German")

['I', 'dont', 'know', 'any', 'German']

In [10]:
ENGLISH = torchtext.data.Field(tokenize=tokenizer_english, lower=True, init_token="<sos>", eos_token="<eos>")

In [11]:
GERMAN = torchtext.data.Field(tokenize=tokenizer_german, lower=True, init_token="<sos>", eos_token="<eos>")

In [12]:
train, validation, test = torchtext.datasets.Multi30k.splits(exts=(".de", ".en"), fields=(GERMAN, ENGLISH))

In [13]:
ENGLISH.build_vocab(train, max_size=10000, min_freq=1)

In [14]:
GERMAN.build_vocab(train, max_size=10000, min_freq=1)

In [15]:
print("ENGLISH vocab_size: ", len(ENGLISH.vocab))
print("GERMAN vocab_size: ", len(GERMAN.vocab))

ENGLISH vocab_size:  9799
GERMAN vocab_size:  10004


In [16]:
train_dataloader, validation_dataloader, test_dataloader = torchtext.data.BucketIterator.splits(
    (train, validation, test),
    batch_size=BATCH_SIZE,
    sort_within_batch=True,
    sort_key=lambda x: len(x.src),
    device=device,
)

In [17]:
for batch_idx, data in enumerate(train_dataloader):
    print(batch_idx)
    print(data.src.transpose(-1, -2).size())
    print(data.trg.transpose(-1, -2).size())
    break

0
torch.Size([64, 19])
torch.Size([64, 24])


In [18]:
def german2english(model, german_sentence, device="cpu", max_len=100):
    tokens = [token.text.lower() for token in nlp_german(german_sentence)]
    tokens = ["<sos>"] + tokens + ["<eos>"]
    
    indexes = [GERMAN.vocab.stoi[token] for token in tokens]
    indexes_tensor = torch.LongTensor(indexes).unsqueeze(0).to(device)
    
    english_sentence = [ENGLISH.vocab.stoi["<sos>"]]
    
    for i in range(max_len):
        trg = torch.LongTensor(english_sentence).unsqueeze(0).to(device)

        with torch.no_grad():
            print(indexes_tensor.size(), trg.size())
            word = model(indexes_tensor, trg)
            print(word.size())

        print(word.argmax(-1))
        top = word.argmax(-1)[-1, :].item()
        english_sentence.append(top)

        if top == ENGLISH.vocab.stoi["<eos>"]:
            break

    english_sentence = [ENGLISH.vocab.itos[word] for word in english_sentence]
    
    return english_sentence[1:]

## Model

In [19]:
from models.transformer import Transformer

In [20]:
source_vocab_size = len(GERMAN.vocab)
target_vocab_size = len(ENGLISH.vocab)
embed_size = 512
num_head = 16
num_ff = 1024
encoder_layers = 3
decoder_layers = 3
hidden_size = 256
dropout = 0.2

In [21]:
model = Transformer(source_vocab_size, target_vocab_size, embed_size, num_head, num_ff, encoder_layers, decoder_layers, hidden_size, dropout=dropout, device=device).to(device)
model

Transformer(
  (encoder_embed): Embedding(10004, 512)
  (decoder_embed): Embedding(9799, 512)
  (encoder_positional_encoding): PositionalEncoding(
    (dropout): Dropout(p=0.2, inplace=False)
  )
  (decoder_positional_encoding): PositionalEncoding(
    (dropout): Dropout(p=0.2, inplace=False)
  )
  (encoders): ModuleList(
    (0): Transformer_Encoder(
      (Norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
      (Norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
      (multi_attention): MultiHeadAttention(
        (dropout_layer): Dropout(p=0.2, inplace=False)
        (Q): Linear(in_features=512, out_features=512, bias=True)
        (K): Linear(in_features=512, out_features=512, bias=True)
        (V): Linear(in_features=512, out_features=512, bias=True)
        (linear): Linear(in_features=512, out_features=512, bias=True)
      )
      (feed_forward): Sequential(
        (0): Linear(in_features=512, out_features=256, bias=True)
        (1): ReLU()
        (2)

In [22]:
def test(size):
    sample_in_x = torch.rand(BATCH_SIZE, 100).type(torch.LongTensor).to(device)
    sample_in_y = torch.rand(BATCH_SIZE, size).type(torch.LongTensor).to(device)
    sample_out = model(sample_in_x, sample_in_y)
    print("Dimenstions of Input Source Vector: ", sample_in_x.size())
    print("Dimenstions of Input Target Vector: ", sample_in_y.size())
    print("Dimenstions of Predicted Vector: ", sample_out.size())
    
test(90)
test(100)
test(110)

Dimenstions of Input Source Vector:  torch.Size([64, 100])
Dimenstions of Input Target Vector:  torch.Size([64, 90])
Dimenstions of Predicted Vector:  torch.Size([64, 90, 9799])
Dimenstions of Input Source Vector:  torch.Size([64, 100])
Dimenstions of Input Target Vector:  torch.Size([64, 100])
Dimenstions of Predicted Vector:  torch.Size([64, 100, 9799])
Dimenstions of Input Source Vector:  torch.Size([64, 100])
Dimenstions of Input Target Vector:  torch.Size([64, 110])
Dimenstions of Predicted Vector:  torch.Size([64, 110, 9799])


## Training

In [23]:
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
criterion = nn.CrossEntropyLoss(ignore_index = ENGLISH.vocab.stoi["<pad>"])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.1, patience=10, verbose=True)

In [24]:
for epoch in range(1, NUM_EPOCHES+1):
    
    #model.eval()
    
    #print(german2english(model, "Ein Mann mit einem orangefarbenen Hut, der etwas anstarrt.", device=device)) # A man in an orange hat starring at something.
    
    model.train()
    
    epoch_train_loss = 0
    epoch_test_loss = 0  
    
    for batch_idx, data in enumerate(train_dataloader):
        x = data.src.transpose(-1, -2).to(device)
        y = data.trg.transpose(-1, -2).to(device)
        
        y_pred = model(x, y)
        
        y_pred = y_pred.reshape(-1, y_pred.size(2))
        y = y.reshape(-1)
        
        optimizer.zero_grad()
        loss = criterion(y_pred, y)
        loss.backward()
        optimizer.step()
        
        epoch_train_loss += loss.item()
        
    with torch.no_grad():
        model.eval()
        
        for batch_idx, data in enumerate(validation_dataloader):
            x = data.src.transpose(-1, -2).to(device)
            y = data.trg.transpose(-1, -2).to(device)
        
            y_pred = model(x, y)
            y_pred = y_pred.reshape(-1, y_pred.size(-1))
            y = y.reshape(-1)
        
            loss = criterion(y_pred, y)
        
            epoch_test_loss += loss.item()
    
    epoch_train_loss = epoch_train_loss / len(train_dataloader.dataset)
    epoch_test_loss = epoch_test_loss / len(validation_dataloader.dataset)
    
    scheduler.step(epoch_train_loss)
    
    print("-------------------------------------------------")
    print("Epoch: {} Train mean loss: {:.8f}".format(epoch, epoch_train_loss))
    print("       {} Test  mean loss: {:.8f}".format(epoch, epoch_test_loss))
    print("-------------------------------------------------")

-------------------------------------------------
Epoch: 1 Train mean loss: 0.13892030
       1 Test  mean loss: 0.13501639
-------------------------------------------------
-------------------------------------------------
Epoch: 2 Train mean loss: 0.13288257
       2 Test  mean loss: 0.13283425
-------------------------------------------------
-------------------------------------------------
Epoch: 3 Train mean loss: 0.13139114
       3 Test  mean loss: 0.13182968
-------------------------------------------------


## Test

In [25]:
german2english(model, "Ein Mann mit einem orangefarbenen Hut, der etwas anstarrt.", device=device) # A man in an orange hat starring at something.

torch.Size([1, 13]) torch.Size([1, 1])
torch.Size([1, 1, 9799])
tensor([[2]], device='cuda:0')
torch.Size([1, 13]) torch.Size([1, 2])
torch.Size([1, 2, 9799])
tensor([[2, 2]], device='cuda:0')


ValueError: only one element tensors can be converted to Python scalars

## Saving Model

In [None]:
#torch.save(model, "trained_models/language_translation_1.pt")