In [1]:
import random
import numpy as np

from torch import nn
import torch
import torch.nn.functional as F
from torch_geometric.datasets import Planetoid
from torch_geometric.data import Data
import torch_geometric.transforms as T
from torch_geometric.nn import SGConv
import pickle

import torch_geometric.transforms as T
from torch_geometric.nn import GCNConv, SAGEConv

import os
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader

device = f'cuda' if torch.cuda.is_available() else 'cpu'
device = torch.device(device)


### Graphsage architecture

In [2]:
class GraphSAGE(torch.nn.Module):

    """
    input dimension: dimension of the feature vector
    output dimension: dimension of the node (this should be equal to the dmension of the trace)
    """
    def __init__(self, in_dim, hidden_dim, out_dim, dropout=0.2):
        super().__init__()
        self.dropout = dropout
        self.conv1 = SAGEConv(in_dim, hidden_dim)
        self.conv2 = SAGEConv(hidden_dim, hidden_dim)
        self.conv3 = SAGEConv(hidden_dim, out_dim)
    
    def forward(self, data):
        x, adj_t = data.x, data.edge_index
        x = self.conv1(x, adj_t)
        x = F.elu(x)
        x = F.dropout(x, p=self.dropout)
        
        x = self.conv2(x, adj_t)
        x = F.elu(x)
        x = F.dropout(x, p=self.dropout)
        
        x = self.conv3(x, adj_t)
        x = F.elu(x)
        x = F.dropout(x, p=self.dropout)
        #return torch.log_softmax(x, dim=-1)
        return x

### MLP architecture

In [3]:
class MLP(torch.nn.Module):
    """
    This will the one-hot encoded labels of all the nodes in the graph and output 
    a output_dim long vector

    input dim: vocab size
    output dim: 100 (a hyperparameter)
    """
    def __init__(self, input_dim, output_dim):
        super().__init__()

        self.input_fc = nn.Linear(input_dim, 200)
        self.hidden_fc = nn.Linear(200,150)
        self.output_fc = nn.Linear(150, output_dim)

    def forward(self, x):
        # x = [num_nodes,vocab_size] (412, 412)
        h_1 = F.relu(self.input_fc(x))
        # h_1 = [num_nodes, 200] (412, 200)

        h_2 = F.relu(self.hidden_fc(h_1))
        # h_2 = [num_nodes, 150] (412, 150)

        output_mlp = self.output_fc(h_2)
        #output_mlp = [num_nodes, output_dim] (412, 100)

        return output_mlp

### LSTM architecture

In [4]:
class lstm(nn.Module):
	def __init__(self, batch_size, output_size, hidden_size, vocab_size):
		super(lstm, self).__init__()
		self.batch_size = batch_size
		self.output_size = output_size
		self.hidden_size = hidden_size
		self.vocab_size = vocab_size
		self.lstm = nn.LSTM(vocab_size, hidden_size)
		self.label = nn.Linear(hidden_size, output_size)



	def forward(self, input_sentence, batch_size=None):
		input = input_sentence.permute(1, 0, 2) 
		h_0 = Variable(torch.zeros(1, batch_size, self.hidden_size))
		c_0 = Variable(torch.zeros(1, batch_size, self.hidden_size))
		output, (final_hidden_state, final_cell_state) = self.lstm(input, (h_0, c_0))
		final_output = self.label(output) 
		return (final_output)

### Training

In [15]:
def train(model_mlp, optimizer_mlp, 
        model_graphsage, optimizer_graphsage,
        model_lstm, optimizer_lstm,
        data_nodeLabels, data_adjacencyMatrix):
    

    model_lstm.train()
    it = iter(train_iter)
    for i, batch in enumerate(it):

        model_mlp.train()
        optimizer_mlp.zero_grad()
        out_mlp = model_mlp(data_nodeLabels)
        print("MLP SUCCESS")
        
        # create a graphsage custom data
        data = Data(x=out_mlp, edge_index=adj_2d.t().contiguous())

        #call graphsage
        model_graphsage.train()
        optimizer_graphsage.zero_grad()
        out_graphsage = model_graphsage(data)
        print("GRAPHSAGE SUCCESS")

    
        output_lstm = model_lstm(batch, batch_size)
        output_lstm = torch.reshape(output_lstm,(2, 5, 50)) #reshape it to batch_lenght, seq_length, output_dim

        loss_calc = calculate_loss(output_lstm, out_graphsage, batch)
        
        loss_calc.backward()
        optimizer_graphsage.step()
        optimizer_mlp.step()
        optimizer_lstm.step()
    return ("success")

In [11]:
def calculate_loss(output_lstm, out_graphsage, batch):
    out_graphsage_reshaped = torch.transpose(out_graphsage, 0, 1)
    c = torch.matmul(output_lstm, out_graphsage_reshaped)
    sm = nn.Softmax(dim=2)
    c = sm(c)

  
    loss_calc = loss_fn(c,batch)
    
    return (loss_calc)

### Dataset Preprocessing

#### Converting sparse adjacency matrix to dense

In [7]:
def structure_adjacency_matrix():
    """
    This method is to convert the original adjacency matrix into a 2 dimensional 
    adjacency matrix
    """
    adj_2d = []
    t_ind = 0
    for t in data_adjacencyMatrix:
        elem_ind = 0
        for elem in t:
            if elem == 1:
                a= [t_ind, elem_ind]
                adj_2d.append(a)
            elem_ind += 1
        t_ind = t_ind+1
    return torch.tensor(adj_2d)


#### Create a demo trace data

In [8]:
traces = [[0,1,4,15,38],
[0,1,6,18,40],
[0,1,4,15,38],
[0,1,6,18,40]]

all_traces_ohe = []
for t in traces:
    trace_ohe = []
    for t_node in t:
        node_pos = [0]*412
        #node_pos.append(t_node)# attaching the node label
        node_pos[t_node] = 1
        trace_ohe.append(node_pos)
    all_traces_ohe.append(trace_ohe)

train_data= torch.FloatTensor(all_traces_ohe)

#### Import nodes and labels

In [9]:
data_nodeLabels = pickle.load(open("/home/dhruvs/depaul_data/nodeLabels.pkl", "rb"))
data_adjacencyMatrix_old = pickle.load(open("/home/dhruvs/depaul_data/adjacencyMatrix.pkl", "rb"))
data_adjacencyMatrix = data_adjacencyMatrix_old.type(torch.int64)

### Defining hyperparameters and initiating the model

In [16]:
lr = 1e-4 
epochs = 2


in_dim_mlp = data_nodeLabels.shape[1] #current vocab size
output_dim_mlp = 100 # this is the output dimension of mlp (a hyperparameter)

input_dim_graphsage = output_dim_mlp # input dimension of graphsage = output dimension of mlp
hidden_dim_graphsage = 75 #this is a hyperparameter
output_dim_graphsage = 50 #this is a hyperparameter

model_mlp = MLP(input_dim=in_dim_mlp, 
                 output_dim= output_dim_mlp)
optimizer_mlp = torch.optim.Adam(model_mlp.parameters(), lr=lr)


model_graphsage = GraphSAGE(in_dim=input_dim_graphsage, 
                 hidden_dim=hidden_dim_graphsage, 
                 out_dim= output_dim_graphsage)
optimizer_graphsage = torch.optim.Adam(model_graphsage.parameters(), lr=lr)

vocab_size = in_dim_mlp
batch_size = 2 
output_size_lstm = output_dim_graphsage
hidden_size_lstm = 100
train_iter = DataLoader((train_data), batch_size = batch_size)


model_lstm = lstm(batch_size, output_size_lstm, hidden_size_lstm, vocab_size)
optimizer_lstm = torch.optim.Adam(model_lstm.parameters(), lr=lr)

adj_2d = structure_adjacency_matrix()

loss_fn = torch.nn.CrossEntropyLoss()

for epoch in range(1, 1 + epochs):
    success = train(model_mlp, optimizer_mlp, 
        model_graphsage, optimizer_graphsage,
        model_lstm, optimizer_lstm,
        data_nodeLabels, adj_2d)
    print(success)

MLP SUCCESS
GRAPHSAGE SUCCESS
MLP SUCCESS
GRAPHSAGE SUCCESS
success
MLP SUCCESS
GRAPHSAGE SUCCESS
MLP SUCCESS
GRAPHSAGE SUCCESS
success


### Some notes

* Currently, I am not dealing with variable length of traces
* I am treating one hot encoded traces as ground truth (which is fine in this case), 
but we need a better way to pass the ground truth to the training module
* Write the Evaluate function