In [51]:
from importlib import reload
import torch
import torch.nn.functional as F
from sklearn.metrics import balanced_accuracy_score, precision_score, recall_score

import data
reload(data)
from data import AmlsimDataset

import modules
reload(modules)
from modules import GCN

In [52]:
# Set device to GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {device}")

Device: cuda


In [65]:
# Load data
traindata = AmlsimDataset(node_file='data/simulation2/swedbank/train/nodes.csv', edge_file='data/simulation2/swedbank/train/edges.csv', node_features=True, node_labels=True).get_data()
testdata = AmlsimDataset(node_file='data/simulation2/swedbank/test/nodes.csv', edge_file='data/simulation2/swedbank/test/edges.csv', node_features=True, node_labels=True).get_data()
traindata = traindata.to(device)
testdata = testdata.to(device)

# # Convert label tensors to one-hot encoded form
# traindata.y = F.one_hot(traindata.y, num_classes=2)
# testdata.y = F.one_hot(testdata.y, num_classes=2)

In [66]:
# Normalize data
mean = traindata.x.mean(dim=0, keepdim=True)
std = traindata.x.std(dim=0, keepdim=True)
traindata.x = (traindata.x - mean) / std
testdata.x = (testdata.x - mean) / std

In [67]:
# Instantiate model
input_dim = 10
hidden_dim = 16
output_dim = 2
n_layers = 3
dropout = 0.3
model = GCN(input_dim, hidden_dim, output_dim, n_layers, dropout)
model.to(device)

GCN(
  (convs): ModuleList(
    (0): GCNConv(10, 16)
    (1): GCNConv(16, 16)
    (2): GCNConv(16, 2)
  )
  (bns): ModuleList(
    (0-1): 2 x BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (softmax): Softmax(dim=1)
)

In [68]:
print(model.output_dim)

2


In [69]:
# optimizer
lr = 0.001
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [70]:
# loss function
criterion = torch.nn.CrossEntropyLoss()

In [72]:
for epoch in range(200):
    model.train()
    optimizer.zero_grad()
    out = model(traindata)
    loss = criterion(out, traindata.y)
    loss.backward()
    optimizer.step()
    if (epoch + 1) % 10 == 0:
        model.eval()
        with torch.no_grad():
            out = model(testdata)
            loss = criterion(out, testdata.y)
            precision = precision_score(testdata.y.cpu().numpy(), out.cpu().numpy().argmax(axis=1), zero_division=0)
            recall = recall_score(testdata.y.cpu().numpy(), out.cpu().numpy().argmax(axis=1), zero_division=0)
            print(f'epoch: {epoch + 1}, loss: {loss:.4f}, precision: {precision:.4f}, recall: {recall:.4f}')

epoch: 10, loss: 0.5409, precision: 0.7500, recall: 0.1627
epoch: 20, loss: 0.5411, precision: 0.7714, recall: 0.1627
epoch: 30, loss: 0.5402, precision: 0.7568, recall: 0.1687
epoch: 40, loss: 0.5380, precision: 0.7632, recall: 0.1747
epoch: 50, loss: 0.5378, precision: 0.7576, recall: 0.1506
epoch: 60, loss: 0.5366, precision: 0.7442, recall: 0.1928
epoch: 70, loss: 0.5365, precision: 0.7442, recall: 0.1928
epoch: 80, loss: 0.5371, precision: 0.7317, recall: 0.1807
epoch: 90, loss: 0.5358, precision: 0.7500, recall: 0.1807
epoch: 100, loss: 0.5338, precision: 0.7143, recall: 0.2108
epoch: 110, loss: 0.5315, precision: 0.6792, recall: 0.2169
epoch: 120, loss: 0.5346, precision: 0.6875, recall: 0.1988
epoch: 130, loss: 0.5367, precision: 0.7111, recall: 0.1928
epoch: 140, loss: 0.5355, precision: 0.7174, recall: 0.1988
epoch: 150, loss: 0.5341, precision: 0.7174, recall: 0.1988
epoch: 160, loss: 0.5333, precision: 0.6875, recall: 0.1988
epoch: 170, loss: 0.5325, precision: 0.6800, reca

In [75]:
from sklearn.metrics import confusion_matrix
import numpy as np

model.eval()
with torch.no_grad():
    out = model(testdata)
    y_pred = out.cpu().numpy().argmax(axis=1)
    y_true = testdata.y.cpu().numpy()
    cm = confusion_matrix(y_true, y_pred)
    print(cm)





[[385  15]
 [132  34]]
Number of labels equal to 0: 400
Number of labels equal to 1: 166
