In [101]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import numpy as np
import pandas as pd
import pickle

torch.manual_seed(1)

<torch._C.Generator at 0x7f193b50f1d0>

In [119]:
class BiLSTM(nn.Module): 
    # This NLP part Will consist of two bidirectional lstm layers and it's output is 
    # determined by the LSTM's last hidden states or output vectors.

    # This will take as an input a sequence of words and output the last hidden layer
    # the last hidden states of 2-layer bidirectional LSTM will be the input of the last multimodel network 

    def __init__(self, embedding_dim, hidden_dim = 256, layer_dim =2, output_dim = 4):
        super(BiLSTM, self).__init__()
        
        self.embedding_dim = embedding_dim
        
        #Hidden dimensions
        self.hidden_dim = hidden_dim # maybe set this to 256

        # Number of hidden layers
        self.layer_dim = layer_dim

        # Building the LSTM 
        # batch_first = True causes the input/output to be of shape 3D (batch_dim, seq_dim, feature_dim) 
        # output will be the same dim as the hidden dim
        self.lstm1 = nn.LSTM(embedding_dim, hidden_dim, layer_dim, batch_first=True, bidirectional=True)
        
        self.final_out = nn.Linear(hidden_dim*2, 4)
    
    
    def forward(self, x):
        # Initialize hidden state with zeros
        # self.layer_dim * 2. because we have one going forwards and another going backwards
        h0 = torch.zeros(self.layer_dim * 2, x.size(0), self.hidden_dim)
        
        
        # Initialize cell state
        c0 =  torch.zeros(self.layer_dim * 2, x.size(0), self.hidden_dim)

        # We suppose we are conducting a 28 time steps In case of using 
        # We need to detach as we are doing truncated backpropagation through time (BPTT)
        # If we don't, we'll backprop all the way to the start even after going through another batch
        out, (hn, cn) = self.lstm1(x, (h0.detach(), c0.detach()))
        
        
        out = self.final_out(out.view(out.size(0), -1))
        
        return torch.sigmoid(out)
        
        # Index hidden state of last time step
        # out.size() --> 256, 100, 256 if we have (input dim = 100 and hidden dim = 100)
        # out[:, -1, :] => 256, 256 --> because we just want the last time step hidden states
        #out = out[:, -1, :] # without an activation function

        # now our: out.size() --> 256, 10 (if output dimension is equal to 10)
        #return out

In [120]:
model = BiLSTM(1041)

loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr = 0.1)

In [121]:
# Here i have to get the ELMO vector in a form, in a format of numpy array.
with open('document_vector.pickle', 'rb') as file:
    vectors = pickle.load(file)
    file.close()

vectors = np.delete(vectors, 1, 1)
vectors = np.delete(vectors, 0, 1)

vectors = torch.from_numpy(np.array(vectors, dtype=np.float64)).view(521, 1, -1).float()
vectors.size()

torch.Size([521, 1, 1041])

In [122]:
# 1: title, 2: journal, 3: author, 4: asbtract
journal = [1] * 3
title = [0] * 10
author = [2] * 5
abstract = [3] * 503

labels =  torch.from_numpy(np.concatenate((journal, title, author, abstract)))

In [123]:
vectors

tensor([[[ 0.3333,  1.0000,  0.0000,  ..., -0.1705,  0.1981, -0.3040]],

        [[ 0.0000,  0.0000,  0.0000,  ..., -0.0636, -0.3796, -0.3931]],

        [[ 0.0769,  1.0000,  0.0000,  ..., -0.3025,  0.5232, -0.0270]],

        ...,

        [[ 0.2500,  1.0000,  0.0000,  ..., -0.4503,  0.4588,  0.1445]],

        [[ 0.0000,  0.0000,  0.0000,  ...,  0.2589,  0.4821, -0.0066]],

        [[ 0.0000,  0.0000,  0.0000,  ..., -0.3396,  0.0858,  0.0686]]])

In [124]:
# Training the model
batch_loss = 0
for epoch in range(300):        
        # Clear for the gradients.
        model.zero_grad()
        
        # here prepare the inputs and targets
        # target = array of labels [0, 1, ] where the label i stands for the class of the word i
        training_input = vectors
        target = labels
        
        # run a forward pass
        label_scores = model(training_input)
        print(label_scores)
        # Calculate loss, backpropagate, and update weights/parameters by calling opt.step()
        loss = loss_function(label_scores, target)
        batch_loss += loss.item()*521
        loss.backward()
        optimizer.step()
        
        
        
        print("Epoch: {0}/300. Loss: {2:.2f} Progress: {1}%".format(epoch, int((epoch * 100)/300), loss.item()) , end="\r")
        

print("Bi-LSTM model training is done!", end='\r')

tensor([[0.5099, 0.5014, 0.4987, 0.4826],
        [0.5102, 0.5063, 0.4856, 0.5001],
        [0.5074, 0.5080, 0.4844, 0.5022],
        ...,
        [0.5067, 0.5110, 0.4854, 0.5016],
        [0.5068, 0.5101, 0.4861, 0.5015],
        [0.5085, 0.5071, 0.4955, 0.4968]], grad_fn=<SigmoidBackward>)
tensor([[0.5101, 0.5016, 0.4988, 0.4939],
        [0.5111, 0.5064, 0.4855, 0.5144],
        [0.5084, 0.5083, 0.4844, 0.5168],
        ...,
        [0.5079, 0.5114, 0.4853, 0.5153],
        [0.5075, 0.5112, 0.4861, 0.5148],
        [0.5092, 0.5068, 0.4958, 0.5096]], grad_fn=<SigmoidBackward>)
tensor([[0.5104, 0.5017, 0.4989, 0.5051],
        [0.5120, 0.5064, 0.4855, 0.5286],
        [0.5094, 0.5085, 0.4845, 0.5315],
        ...,
        [0.5089, 0.5122, 0.4849, 0.5291],
        [0.5084, 0.5122, 0.4859, 0.5284],
        [0.5099, 0.5061, 0.4957, 0.5225]], grad_fn=<SigmoidBackward>)
tensor([[0.5106, 0.5019, 0.4989, 0.5164],
        [0.5129, 0.5066, 0.4855, 0.5429],
        [0.5104, 0.5088, 0.4845, 0.54

KeyboardInterrupt: 