In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

import numpy as np
import pandas as pd
import pickle

import os

torch.manual_seed(1)

<torch._C.Generator at 0x7ffbf805ee70>

In [3]:
class BiLSTM(nn.Module): 
    # This NLP part Will consist of two bidirectional lstm layers and it's output is 
    # determined by the LSTM's last hidden states or output vectors.

    # This will take as an input a sequence of words and output the last hidden layer
    # the last hidden states of 2-layer bidirectional LSTM will be the input of the last multimodel network 

    def __init__(self, embedding_dim, hidden_dim = 256, layer_dim =2, output_dim = 10):
        super(BiLSTM, self).__init__()
        
        self.embedding_dim = embedding_dim
        
        #Hidden dimensions
        self.hidden_dim = hidden_dim # maybe set this to 256

        # Number of hidden layers
        self.layer_dim = layer_dim

        # Building the LSTM 
        # batch_first = True causes the input/output to be of shape 3D (batch_dim, seq_dim, feature_dim) 
        # output will be the same dim as the hidden dim
        self.lstm1 = nn.LSTM(embedding_dim, hidden_dim, layer_dim, batch_first=True, bidirectional=True)
        
        self.fc = nn.Linear(hidden_dim * 2, output_dim)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        # Initialize hidden state with zeros
        # self.layer_dim * 2. because we have one going forwards and another going backwards
        h0 = torch.zeros(self.layer_dim * 2, x.size(0), self.hidden_dim)
        
        
        # Initialize cell state
        c0 =  torch.zeros(self.layer_dim * 2, x.size(0), self.hidden_dim)

        # We suppose we are conducting a 28 time steps In case of using 
        # We need to detach as we are doing truncated backpropagation through time (BPTT)
        # If we don't, we'll backprop all the way to the start even after going through another batch
        out, (hn, cn) = self.lstm1(x, (h0.detach(), c0.detach()))
                
        # out = self.fc(out.view(out.size(0), -1))
          
        # Without the activation function, out will contain the last hidden layer.
        # This could be obtianed from hn[-1] as well.
        out = out[:, -1, :]
        
        out = self.fc(out)
        
        out = self.sigmoid(out)
        
        return out
        
        # Index hidden state of last time step
        # out.size() --> 256, 100, 256 if we have (input dim = 100 and hidden dim = 100)
        # out[:, -1, :] => 256, 256 --> because we just want the last time step hidden states
        #out = out[:, -1, :] # without an activation function

        # now our: out.size() --> 256, 10 (if output dimension is equal to 10)
        #return out

In [65]:
# 20 embedding size
# word feature vector = [title score nlp, title score image vision, abstract score nlp, ...etc]
# target = [1.0 (title) , 0, 0 ...etc] as an example
model = BiLSTM(20)


#loss_function = nn.NLLLoss()
loss_function = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr = 0.1)

In [59]:
def process_vision_output():
    for file in os.scandir('./vision_output'):
        if (os.path.isfile('./nlp_output/{0}'.format(file.name))):
            vision_out = []
            with open(file, "rb") as openfile:
                while True:
                    try:
                        vision_out.append(pickle.load(openfile))
                    except EOFError:
                        break

            processed_out = []

            for idk in vision_out:
                for segment in idk:
                    vector = segment[1]
                    for word in segment[0].split(' '):
                        processed_out.append([word, vector])

            processed_out = processed_out
            with open("./processed_vision_output/{0}".format(file.name), 'wb') as f:
                pickle.dump(processed_out, f)

In [2]:
i=1
for nlp_file in os.scandir('./nlp_output'):
    with open(nlp_file, "rb") as f:
        nlp_out = pickle.load(f)
        
    with open("./processed_vision_output/{0}".format(nlp_file.name), "rb") as f:
        vision_out = pickle.load(f)
    nlp_out = np.reshape(nlp_out, (int(nlp_out.shape[0]/2), -1))
    
    
    combined_out = []
    clone_nlp_out = np.copy(nlp_out)
    for word in vision_out:
        if word[0] != '':
            match_indexes = np.where(clone_nlp_out == word[0])
            if len(match_indexes[0]) == 0:
                continue
            match_words = clone_nlp_out[match_indexes[0][0]]
            if match_words.shape[0] > 0:
                word_string = match_words[0]
                word_vector = match_words[1]
                
                combined_out.append([word_string, word_vector, word[1]])
                
                clone_nlp_out = np.delete(clone_nlp_out, match_indexes[0][0], axis=0)
    
    with open("./input/{0}".format(nlp_file.name), 'wb') as f:
        pickle.dump(combined_out, f)
        i+=1
        print(i, end='\r')
print("finished {0} files".format(i))

  from ipykernel import kernelapp as app


finished 166 files


In [54]:
objects = []
with (open("./vision_output/ssoar_datasetssoar-journpsycho-2003-2-diese_welt_bedarf_unserer_aufmerksamkeit.pickle", "rb")) as openfile:
    while True:
        try:
            objects.append(pickle.load(openfile))
        except EOFError:
            break
            
print(type(objects[0][1][1]))

<class 'torch.Tensor'>
