In [54]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import numpy as np
import pandas as pd
import pickle

torch.manual_seed(1)

<torch._C.Generator at 0x7faa54092c50>

In [72]:
class BiLSTM(nn.Module): 
    # This NLP part Will consist of two bidirectional lstm layers and it's output is 
    # determined by the LSTM's last hidden states or output vectors.

    # This will take as an input a sequence of words and output the last hidden layer
    # the last hidden states of 2-layer bidirectional LSTM will be the input of the last multimodel network 

    def __init__(self, embedding_dim, hidden_dim = 256, layer_dim =2, output_dim = 4):
        super(BiLSTM, self).__init__()

        #Hidden dimensions
        self.hidden_dim = hidden_dim # maybe set this to 256

        # Number of hidden layers
        self.layer_dim = layer_dim

        # Building the LSTM 
        # batch_first = True causes the input/output to be of shape 3D (batch_dim, seq_dim, feature_dim) 
        # output will be the same dim as the hidden dim
        self.lstm1 = nn.LSTM(embedding_dim, hidden_dim, layer_dim, batch_first=True, bidirectional=True)

    
    
    def forward(self, x):
        # Initialize hidden state with zeros
        # self.layer_dim * 2. because we have one going forwards and another going backwards
        
        h0 = torch.zeros(self.layer_dim * 2, x.size(0), self.hidden_dim)
        
        
        # Initialize cell state
        c0 =  torch.zeros(self.layer_dim * 2, x.size(0), self.hidden_dim)

        # We suppose we are conducting a 28 time steps In case of using 
        # We need to detach as we are doing truncated backpropagation through time (BPTT)
        # If we don't, we'll backprop all the way to the start even after going through another batch
        out, (hn, cn) = self.lstm1(x, (h0.detach(), c0.detach()))
        # Index hidden state of last time step
        # out.size() --> 256, 100, 256 if we have (input dim = 100 and hidden dim = 100)
        # out[:, -1, :] => 256, 256 --> because we just want the last time step hidden states
        out = out[:, -1, :] # without an activation function

        # now our: out.size() --> 256, 10 (if output dimension is equal to 10)
        return out

In [73]:
model = BiLSTM(1041)

loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr = 0.1)

In [74]:
# Here i have to get the ELMO vector in a form, in a format of numpy array.
with open('document_vector.pickle', 'rb') as file:
    vectors = pickle.load(file)
    file.close()

vectors = np.delete(vectors, 1, 1)
vectors = np.delete(vectors, 0, 1)

vectors = torch.from_numpy(np.array(vectors, dtype=np.float64)).view(521, 1, -1).float()
vectors.size()

torch.Size([521, 1, 1041])

In [75]:
# 1: title, 2: journal, 3: author, 4: asbtract
journal = [2] * 3
title = [1] * 10
author = [3] * 5
abstract = [4] * 503

labels =  torch.from_numpy(np.concatenate((journal, title, author, abstract)))

In [76]:
vectors

tensor([[[ 0.3333,  1.0000,  0.0000,  ..., -0.1705,  0.1981, -0.3040]],

        [[ 0.0000,  0.0000,  0.0000,  ..., -0.0636, -0.3796, -0.3931]],

        [[ 0.0769,  1.0000,  0.0000,  ..., -0.3025,  0.5232, -0.0270]],

        ...,

        [[ 0.2500,  1.0000,  0.0000,  ..., -0.4503,  0.4588,  0.1445]],

        [[ 0.0000,  0.0000,  0.0000,  ...,  0.2589,  0.4821, -0.0066]],

        [[ 0.0000,  0.0000,  0.0000,  ..., -0.3396,  0.0858,  0.0686]]])

In [77]:
# Training the model

for epoch in range(300):        
        # Clear for the gradients.
        model.zero_grad()
        
        # here prepare the inputs and targets
        # target = array of labels [0, 1, ] where the label i stands for the class of the word i
        training_input = vectors
        target = labels
        
        # run a forward pass
        label_scores = model(training_input)
        
        # Calculate loss, backpropagate, and update weights/parameters by calling opt.step()
        loss = loss_function(label_scores, target)
        loss.backward()
        optimizer.step()
        
        print("Epoch: {0}/300. Progress: {1}%".format(epoch, int((epoch * 100)/300)))
        
        

torch.Size([521, 1, 1041])
Epoch 0/300. Progress: 0%
torch.Size([521, 1, 1041])
Epoch 1/300. Progress: 0%
torch.Size([521, 1, 1041])
Epoch 2/300. Progress: 0%
torch.Size([521, 1, 1041])
Epoch 3/300. Progress: 1%
torch.Size([521, 1, 1041])
Epoch 4/300. Progress: 1%
torch.Size([521, 1, 1041])
Epoch 5/300. Progress: 1%
torch.Size([521, 1, 1041])
Epoch 6/300. Progress: 2%
torch.Size([521, 1, 1041])
Epoch 7/300. Progress: 2%
torch.Size([521, 1, 1041])
Epoch 8/300. Progress: 2%
torch.Size([521, 1, 1041])
Epoch 9/300. Progress: 3%
torch.Size([521, 1, 1041])
Epoch 10/300. Progress: 3%
torch.Size([521, 1, 1041])
Epoch 11/300. Progress: 3%
torch.Size([521, 1, 1041])
Epoch 12/300. Progress: 4%
torch.Size([521, 1, 1041])
Epoch 13/300. Progress: 4%
torch.Size([521, 1, 1041])
Epoch 14/300. Progress: 4%
torch.Size([521, 1, 1041])
Epoch 15/300. Progress: 5%
torch.Size([521, 1, 1041])
Epoch 16/300. Progress: 5%
torch.Size([521, 1, 1041])
Epoch 17/300. Progress: 5%
torch.Size([521, 1, 1041])
Epoch 18/30

KeyboardInterrupt: 