In [1]:
from importlib import reload
import torch
import torch.nn.functional as F
from sklearn.metrics import balanced_accuracy_score, precision_score, recall_score

import data
reload(data)
from data import AmlsimDataset

import modules
reload(modules)
from modules import GCN, GCN_GNNExplainer, GCN_GraphSVX
from modules import GraphSAGE
from torch_geometric.data import DataLoader
import torch.optim as optim
from torch_geometric.utils import to_networkx

import matplotlib.pyplot as plt

%matplotlib inline
import networkx as nx
from torch_geometric.data import Data

import numpy as np

In [2]:
# Set device to GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {device}")

Device: cuda


In [3]:
# Load data
traindata = AmlsimDataset(node_file='data/simulation3/swedbank/train/nodes.csv', edge_file='data/simulation3/swedbank/train/edges.csv', node_features=True, node_labels=True).get_data()
testdata = AmlsimDataset(node_file='data/simulation3/swedbank/test/nodes.csv', edge_file='data/simulation3/swedbank/test/edges.csv', node_features=True, node_labels=True).get_data()
traindata = traindata.to(device)
testdata = testdata.to(device)

# Normalize data
mean = traindata.x.mean(dim=0, keepdim=True)
std = traindata.x.std(dim=0, keepdim=True)
traindata.x = (traindata.x - mean) / std
testdata.x = (testdata.x - mean) / std

In [11]:
test_data = Data(x=testdata.x, edge_index=testdata.edge_index)
train_data = Data(x=traindata.x, edge_index=traindata.edge_index)






In [12]:
from torch import nn
from torch_geometric.nn import GCNConv, SAGEConv, GINEConv, GATConv, BatchNorm, Linear
import torch
from tqdm.notebook import tqdm
import torch.optim as optim


In [13]:
class GAT(nn.Module):
    
    def __init__(self, input_dim, hidden_dim, output_dim,
                 heads_1=8, heads_2=1, att_dropout=0.6, input_dropout=0.6):
        
        super(GAT, self).__init__()

        self.att_dropout = att_dropout
        self.input_dropout = input_dropout

        self.conv1 = GATConv(in_channels=input_dim,
                             out_channels=hidden_dim // heads_1,
                             heads=heads_1,
                             concat=True,
                             dropout=att_dropout)
        self.conv2 = GATConv(in_channels=hidden_dim,
                             out_channels=output_dim,
                             heads=heads_2,
                             concat=False,
                             dropout=att_dropout)

    def forward(self, x, edge_index):
        x = F.dropout(x, p=self.input_dropout, training=self.training)
        x = self.conv1(x, edge_index)
        x = F.elu(x)
        x = F.dropout(x, p=self.input_dropout, training=self.training)
        x = self.conv2(x, edge_index)

        return F.log_softmax(x, dim=1)
    
hparams = {
    'input_dim': 10,
    'hidden_dim': 16,
    'output_dim': 2
}

model = GAT(**hparams)
model

GAT(
  (conv1): GATConv(10, 2, heads=8)
  (conv2): GATConv(16, 2, heads=1)
)

In [14]:
def accuracy(output, labels):
    _, pred = output.max(dim=1)
    correct = pred.eq(labels).double()
    correct = correct.sum()
    
    return correct / len(labels)

lr = 0.005
epochs = 400

model.train()
optimizer = optim.Adam(model.parameters(), lr=lr)
model.to(device)
for epoch in tqdm(range(epochs)):
    optimizer.zero_grad()
    
    output = model(test_data.x, test_data.edge_index)
    output_train = model(train_data.x,train_data.edge_index)
    #convert train_data.y to tensor
    
    loss = F.nll_loss(output_train, train_data.y)
    
    loss.backward()
    optimizer.step()
    
    if epoch % 10 == 0:
        acc = accuracy(output, test_data.y)
        print('Epoch: {:3d}, acc = {:.3f}'.format(epoch, acc))

  0%|          | 0/400 [00:00<?, ?it/s]

TypeError: nll_loss_nd(): argument 'target' (position 2) must be Tensor, not NoneType

In [None]:
node_idx=1
model.eval()

In [None]:
# instantiate a GraphLIME object
explainer = GraphLIME(model, hop=2, rho=0.1, cached=True)

# explain node features by calling the method `explain_node()`
coefs = explainer.explain_node(node_idx, data.x, data.edge_index)

coefs