In [1]:
import numpy as np
import torch
import pickle

In [2]:
# Parameters
HIDDEN_DIM = 128

In [8]:
traces = pickle.load(open("../generateData/traces.pkl", "rb"))
featureMatrix = pickle.load(open("../generateData/nodeLabels.pkl", "rb"))

In [6]:
# number of nodes
nodeNum = len(featureMatrix[0])

# turn each element of a row in traces into one hot encoded vector
def one_hot_encode(x):
    vec = np.zeros(nodeNum)
    vec[x] = 1
    return vec

traces_x = np.array([[one_hot_encode(x) for x in row] for row in traces])

# to tensor
traces_x = torch.tensor(traces_x, dtype=torch.long)
traces_y = traces


In [11]:
traces_x.size(), traces_y.size()

(torch.Size([100, 3, 111]), torch.Size([100, 3]))

In [12]:
# Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])
featureMatrix = pickle.load(open("../generateData/nodeLabels.pkl", "rb"))
adjacencyList = pickle.load(open("../generateData/adjacencyList.pkl", "rb"))

# give the labels to the nodes
y = [] 
for i in range(len(featureMatrix)): y.append(i)
y = torch.tensor(y, dtype=torch.long)

# randomly mask 10 percent of the data for test
test_mask = torch.zeros(len(featureMatrix), dtype=torch.bool)
test_mask[:int(len(featureMatrix)*0.2)] = 1
test_mask = test_mask[torch.randperm(test_mask.size(0))] # shuffle the train_mask

data = Data(x=featureMatrix, edge_index=adjacencyList, y=y, test_mask=test_mask)

# num features & num classes
num_features = len(featureMatrix[0])
num_classes = len(featureMatrix[0])

In [11]:
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import SAGEConv
from torch_geometric.datasets import Planetoid

class BiLSTM(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, num_layers=1):
        super(BiLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size=in_channels,
                            hidden_size=hidden_channels,
                            num_layers=num_layers,
                            bidirectional=True,
                            batch_first=True)
        
    def forward(self, x):
        x = x.unsqueeze(0)
        output, _ = self.lstm(x)
        return output.squeeze(0)
    
class GraphSAGE(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GraphSAGE, self).__init__()
        self.conv1 = SAGEConv(in_channels, hidden_channels)
        self.conv2 = SAGEConv(hidden_channels, out_channels)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        hidden_embeddings = x.detach()  # Store the hidden node embeddings
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv2(x, edge_index)
        return x, hidden_embeddings

class CombinedModel(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(CombinedModel, self).__init__()
        self.graphsage = GraphSAGE(in_channels, hidden_channels, out_channels)
        self.bilstm = BiLSTM(in_channels, hidden_channels//2)

    def forward(self, x, edge_index):
        out_graphsage, embeddings = self.graphsage(x, edge_index)
        out_bilstm = self.bilstm(x)
        combined = torch.matmul(embeddings, out_bilstm.transpose(0, 1))
        return combined

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = CombinedModel(num_features, HIDDEN_DIM, num_classes).to(device)
data = data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

def train():
    model.train()
    optimizer.zero_grad()
    combined = model(data.x, data.edge_index)
    out = torch.matmul(combined, combined.transpose(0, 1))
    labels = data.y
    loss = F.cross_entropy(out, labels)
    loss.backward()
    optimizer.step()
    return loss.item()

def test():
    model.eval()
    combined = model(data.x, data.edge_index)
    out = torch.matmul(combined, combined.transpose(0, 1))
    out = out[data.test_mask, :]
    pred = out.argmax(dim=1)
    correct = pred.eq(data.y[data.test_mask]).sum().item()
    return correct / data.test_mask.sum().item()

for epoch in range(1, 201):
    loss = train()
    if epoch % 10 == 0:
        acc = test()
        print(f'Epoch: {epoch}, Loss: {loss:.4f}, Test Acc: {acc:.4f}')


torch.Size([111, 111])
torch.Size([111, 111])
torch.Size([111, 111])
torch.Size([111, 111])
torch.Size([111, 111])
torch.Size([111, 111])
torch.Size([111, 111])
torch.Size([111, 111])
torch.Size([111, 111])
torch.Size([111, 111])


TypeError: unsupported format string passed to NoneType.__format__

In [None]:
data.x

tensor([[1., 0., 0.,  ..., 0., 0., 0.],
        [0., 1., 0.,  ..., 0., 0., 0.],
        [0., 0., 1.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 1., 0., 0.],
        [0., 0., 0.,  ..., 0., 1., 0.],
        [0., 0., 0.,  ..., 0., 0., 1.]])