# Machine translation 

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

import spacy

import random
import math
import time

from torchtext.datasets import TranslationDataset, Multi30k #WMT14, IWSLT
from torchtext.data import Field, BucketIterator

import torch.nn.functional as F

In [3]:
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

class Encoder(nn.Module):
    def __init__():
        super().__init__(input_dim, emb_dim, hidden_dim, n_layers)
        
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.emb_dim = emb_dim
        self.n_layers = n_layers
        
        self.embedding = nn.Embedding(self.input_dim, self.emb_dim)
        self.rnn = nn.LSTM(self.emb_dim, self.hidden_dim, self.n_layers)
        
        self.dropout = nn.Dropout(dropout)
    
    def forward(self, x):
        y = self.embedding(x)
        y = self.dropout(y)
        outputs, (hidden, cell) = self.rnn(y)
        
        return hidden, cell 

In [4]:
class Decoder(nn.Module):
    def __init__(self, output_dim, emb_dim, hid_dim, n_layers=3, dropout=0.5):
        super().__init__()
        
        self.emb_dim = emb_dim
        self.hid_dim = hid_dim
        self.output_dim = output_dim
        self.n_layers = n_layers
        self.dropout = dropout
        
        self.embedding = nn.Embedding(output_dim, emb_dim)        
        self.rnn = nn.LSTM(self.emb_dim, self.hid_dim, self.n_layers)       
        self.out = nn.Linear(self.hid_dim, self.output_dim)        
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, x, hidden, cell):
        x = x.unsqueeze(0)
        embedded = self.embedding(x)
        embedded = self.dropout(embedded)                
        output, (hidden, cell) = self.rnn(embedded, (hidden, cell))
        
        prediction = self.out(output.squeeze(0))
        
        return prediction, hidden, cell

In [5]:
class seq2seq(nn.Module):
    def __init__(encoder, decoder, device):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.device = device
        self._init_weights()
        
    def forward(self, src, trg, teacher_forcing_ratio = 0.5):
        batch_size = trg.shape[1]
        max_len = trg.shape[0]
        trg_vocab_size = self.decoder.output_dim
        outputs = torch.zeros(max_len, batch_size, trg_vocab_size).to(self.device)
        
        hidden, cell = self.encoder(src)
        
        input_ = trg[0,:]
        
        for t in range(1, max_len):            
            output, hidden, cell = self.decoder(input_, hidden, cell)
            outputs[t] = output
            teacher_force = random.random() < teacher_forcing_ratio
            top1 = output.max(1)[1]
            input_ = (trg[t] if teacher_force else top1)
        
        return outputs
    
    def _init_weights(self):
        p = 0.08
        for name, param in self.named_parameters():
            nn.init.uniform_(param.data, -p, p)
        