In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

import numpy as np
import pandas as pd
import pickle

import os

torch.manual_seed(1)

<torch._C.Generator at 0x7fac1c067ed0>

In [3]:
class BiLSTM(nn.Module): 
    # This NLP part Will consist of two bidirectional lstm layers and it's output is 
    # determined by the LSTM's last hidden states or output vectors.

    # This will take as an input a sequence of words and output the last hidden layer
    # the last hidden states of 2-layer bidirectional LSTM will be the input of the last multimodel network 

    def __init__(self, embedding_dim, hidden_dim = 256, layer_dim =2, output_dim = 10):
        super(BiLSTM, self).__init__()
        
        self.embedding_dim = embedding_dim
        
        #Hidden dimensions
        self.hidden_dim = hidden_dim # maybe set this to 256

        # Number of hidden layers
        self.layer_dim = layer_dim

        # Building the LSTM 
        # batch_first = True causes the input/output to be of shape 3D (batch_dim, seq_dim, feature_dim) 
        # output will be the same dim as the hidden dim
        self.lstm1 = nn.LSTM(embedding_dim, hidden_dim, layer_dim, batch_first=True, bidirectional=True)
        
        self.fc = nn.Linear(hidden_dim * 2, output_dim)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        # Initialize hidden state with zeros
        # self.layer_dim * 2. because we have one going forwards and another going backwards
        h0 = torch.zeros(self.layer_dim * 2, x.size(0), self.hidden_dim)
        
        
        # Initialize cell state
        c0 =  torch.zeros(self.layer_dim * 2, x.size(0), self.hidden_dim)

        # We suppose we are conducting a 28 time steps In case of using 
        # We need to detach as we are doing truncated backpropagation through time (BPTT)
        # If we don't, we'll backprop all the way to the start even after going through another batch
        out, (hn, cn) = self.lstm1(x, (h0.detach(), c0.detach()))
                
        # out = self.fc(out.view(out.size(0), -1))
          
        # Without the activation function, out will contain the last hidden layer.
        # This could be obtianed from hn[-1] as well.
        out = out[:, -1, :]
        
        out = self.fc(out)
        
        out = self.sigmoid(out)
        
        return out
        
        # Index hidden state of last time step
        # out.size() --> 256, 100, 256 if we have (input dim = 100 and hidden dim = 100)
        # out[:, -1, :] => 256, 256 --> because we just want the last time step hidden states
        #out = out[:, -1, :] # without an activation function

        # now our: out.size() --> 256, 10 (if output dimension is equal to 10)
        #return out

In [4]:
# 20 embedding size
# word feature vector = [title score nlp, title score image vision, abstract score nlp, ...etc]
# target = [1.0 (title) , 0, 0 ...etc] as an example
model = BiLSTM(20)


#loss_function = nn.NLLLoss()
loss_function = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr = 0.1)

In [22]:
with open('./vision_output/ssoar-1996-sfb_186_report_nr_5.pickle', 'rb') as f:
    img = pickle.load(f)
print(img)
with open('./nlp_output/ssoar_datasetssoar-1996-sfb_186_report_nr_5.pickle', 'rb') as f:
    nlp = pickle.load(f)


[('www.ssoar.info', tensor([5.6229e-04, 2.5129e-04, 4.5295e-04, 3.7067e-04, 5.8728e-04, 2.6566e-04,
        1.2111e-04, 9.9434e-01, 2.0693e-03, 9.8032e-04])), ('Sfb 186 report; Nr. 5/ Februar 1996', tensor([1.7700e-04, 1.5972e-04, 2.9210e-04, 7.9165e-04, 3.0515e-04, 1.2186e-04,
        1.3006e-04, 2.8704e-03, 9.9340e-01, 1.7533e-03])), ('', tensor([1.0744e-03, 2.6179e-03, 2.1131e-03, 1.2077e-03, 9.5848e-01, 3.1879e-03,
        6.6597e-04, 3.4938e-03, 1.2879e-03, 2.5874e-02]))]


In [23]:
for txt in img:
    print(txt[0])

www.ssoar.info
Sfb 186 report; Nr. 5/ Februar 1996



In [24]:
print(nlp)

['Sonderforschungsbereich' array([0., 0., 1., 0., 0., 0., 0., 0., 0., 0.])
 '186' array([0., 0., 1., 0., 0., 0., 0., 0., 0., 0.]) 'D'
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]) 'er'
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]) 'S'
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]) 'o'
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]) 'n'
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]) 'd'
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]) 'e'
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]) 'rfo'
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]) 'rsc'
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]) 'h'
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]) 'u'
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]) 'n'
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]) 'g'
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]) 'sb'
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]) 'e'
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]) 'reic'
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]) 'h'
 array([0