In [None]:
import numpy as np
import pickle
from io import open
import unicodedata
import string
import re
import random
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
from torch.autograd import Variable
from torch import optim

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Data Loading

In [None]:
#Labels:
Titles = np.ravel(pickle.load(open('Titles.pkl','rb')))
dico_titles = pickle.load(open('title_prop.pkl','rb'))

#Fasttext embeddings:
X_fasttext = pickle.load(open('X_fasttext_ind.pkl','rb'))
X_fasttext_nogen = pickle.load(open('X_fasttext_nogen_ind.pkl','rb'))

## Data preprocessing, tensorization

In [None]:
train = data_utils.TensorDataset(X_fasttext, Titles)
train_loader = data_utils.DataLoader(train, batch_size = 10, shuffle = True)

trainnogen = data_utils.TensorDataset(X_fasttext_nogen, Titles)
train_loader_nogen = data_utils.DataLoader(train_nogen, batch_size = 10, shuffle = True)

### Models

In [None]:
class Encoder(nn.Module):
    def __init__(self, input_size, hidden_size, bidirectional = True):
        super(Encoder, self).__init__()
        self.hidden_size = hidden_size
        self.input_size = input_size
        self.bidirectional = bidirectional

        self.gru = nn.GRU(input_size, hidden_size, bidirectional = bidirectional)

    def forward(self, inputs, hidden):
        output, hidden = self.gru(inputs.view(1, 1, self.input_size), hidden)
        return output, hidden

    def init_hidden(self):
        return (torch.zeros(1 + int(self.bidirectional), 1, self.hidden_size),
          torch.zeros(1 + int(self.bidirectional), 1, self.hidden_size))

In [None]:
class AttentionDecoder(nn.Module):
  
    def __init__(self, hidden_size, output_size, vocab_size):
        super(AttentionDecoder, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size

        self.attn = nn.Linear(hidden_size + output_size, 1)
        self.gru = nn.GRU(hidden_size + vocab_size, output_size) #if we are using embedding hidden_size should be added with embedding of vocab size
        self.final = nn.Linear(output_size, vocab_size)
  
    def init_hidden(self):
        return (torch.zeros(1, 1, self.output_size),
          torch.zeros(1, 1, self.output_size))
  
    def forward(self, decoder_hidden, encoder_outputs, input):

        weights = []
        for i in range(len(encoder_outputs)):
            weights.append(self.attn(torch.cat((decoder_hidden[0][0], 
                                              encoder_outputs[i]), dim = 1)))
        normalized_weights = F.softmax(torch.cat(weights, 1), 1)

        attn_applied = torch.bmm(normalized_weights.unsqueeze(1),
                                 encoder_outputs.view(1, -1, self.hidden_size))

        input_lstm = torch.cat((attn_applied[0], input[0]), dim = 1)

        output, hidden = self.gru(input_lstm.unsqueeze(0), decoder_hidden)

        output = self.final(output[0])

        return output, hidden, normalized_weights


## Training

In [None]:
def train_bio(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):
    encoder_hidden = encoder.initHidden()

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

    loss = 0

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(
            input_tensor[ei], encoder_hidden)
        encoder_outputs[ei] = encoder_output[0, 0]

    decoder_input = torch.tensor([[0]], device=device)

    decoder_hidden = encoder_hidden

    else:
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.squeeze().detach()  
            loss += criterion(decoder_output, target_tensor[di])
            

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length

In [None]:
def train_all(inputs, targets, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):
    
    for i in range(len(inputs)):
        print(train_bio(inputs[i], targets[i], encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=max_length))