# Testes com a GCN implementada no PyTorch Geometric

Foi utilizada uma implementação em [PyTorch Geometric][1] da rede neural presente no artigo ["Anti-Money Laundering in Bitcoin: Experimenting with Graph Convolutional Networks for Financial Forensics"][2].

[1]: https://pytorch-geometric.readthedocs.io/en/latest/
[2]: https://arxiv.org/pdf/1908.02591.pdf

## Importando as Bibliotecas

In [1]:
import torch
import torch.nn.functional as F
import torch_geometric.nn as nn
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report

## Carregar os Dados

In [2]:
train_data = []
test_data = []

for i in range(1,35):
    train_data.append(torch.load('elliptic_pt/train/' + str(i) + '.pt'))

for i in range(35,50):
    test_data.append(torch.load('elliptic_pt/test/' + str(i) + '.pt'))

In [3]:
train_data

[Data(x=[2147, 166], edge_index=[2, 1924], y=[2147], adjacency_matrix=[2147, 2147]),
 Data(x=[1117, 166], edge_index=[2, 858], y=[1117], adjacency_matrix=[1117, 1117]),
 Data(x=[1279, 166], edge_index=[2, 727], y=[1279], adjacency_matrix=[1279, 1279]),
 Data(x=[1440, 166], edge_index=[2, 1169], y=[1440], adjacency_matrix=[1440, 1440]),
 Data(x=[1882, 166], edge_index=[2, 1491], y=[1882], adjacency_matrix=[1882, 1882]),
 Data(x=[485, 166], edge_index=[2, 209], y=[485], adjacency_matrix=[485, 485]),
 Data(x=[1203, 166], edge_index=[2, 858], y=[1203], adjacency_matrix=[1203, 1203]),
 Data(x=[1165, 166], edge_index=[2, 1044], y=[1165], adjacency_matrix=[1165, 1165]),
 Data(x=[778, 166], edge_index=[2, 484], y=[778], adjacency_matrix=[778, 778]),
 Data(x=[972, 166], edge_index=[2, 538], y=[972], adjacency_matrix=[972, 972]),
 Data(x=[696, 166], edge_index=[2, 477], y=[696], adjacency_matrix=[696, 696]),
 Data(x=[506, 166], edge_index=[2, 446], y=[506], adjacency_matrix=[506, 506]),
 Data(x=

## Definir o Modelo

In [4]:
hidden_size = 100
n_classes = 2


class GCN(torch.nn.Module):
    def __init__(self):
        super(GCN, self).__init__()
        self.conv1 = nn.GCNConv(166,100, bias=False)
        self.act1 = torch.nn.ReLU()
        self.conv2 = nn.GCNConv(100,2, bias=False)
        self.act2 = torch.nn.Softmax(dim=1)
    
    def forward(self, x, edge_index, batch_index):
        hidden1 = self.conv1(x, edge_index)
        hidden2 = self.act1(hidden1)
        hidden3 = self.conv2(hidden2, edge_index)
        output = self.act2(hidden3)
        
        return hidden3, output

In [5]:
model = GCN()

In [6]:
model

GCN(
  (conv1): GCNConv(166, 100)
  (act1): ReLU()
  (conv2): GCNConv(100, 2)
  (act2): Softmax(dim=1)
)

In [7]:
## Use a GPU para treinar
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)

In [8]:
loss = torch.nn.CrossEntropyLoss(weight=torch.Tensor([0.7, 0.3]))

In [9]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

## Treino

In [10]:
def train():
    model.train()
    # Enumerar sobre os dados.
    for ts, data in enumerate(train_data):
        for epoch in range(1000):
            # Usar GPU
            data.to(device)
            # Resetar Gradientes
            optimizer.zero_grad()
            # Passando as informações do batch e de conexão dos grafos
            hidden, logits = model(data.x.float(), data.edge_index, None)
            #label_pred = pred.max(1)[1]
            # Calculando a perda e os gradientes.
            l = loss(logits, data.y)
            l.backward()
            # Atualizar usando os gradientes.
            optimizer.step()
        if (epoch + 1) % 100 == 0:
            print('ts',ts+1,'epoch =', epoch + 1, 'loss =', l.item())

In [11]:
train()

ts 1 epoch = 1000 loss = 0.3220149278640747
ts 2 epoch = 1000 loss = 0.32146376371383667
ts 3 epoch = 1000 loss = 0.3240981996059418
ts 4 epoch = 1000 loss = 0.3306117355823517
ts 5 epoch = 1000 loss = 0.31942635774612427
ts 6 epoch = 1000 loss = 0.3275001645088196
ts 7 epoch = 1000 loss = 0.337736040353775
ts 8 epoch = 1000 loss = 0.34862545132637024
ts 9 epoch = 1000 loss = 0.3419356048107147
ts 10 epoch = 1000 loss = 0.3273288309574127
ts 11 epoch = 1000 loss = 0.33356577157974243
ts 12 epoch = 1000 loss = 0.33032894134521484
ts 13 epoch = 1000 loss = 0.3492515981197357
ts 14 epoch = 1000 loss = 0.3399660885334015
ts 15 epoch = 1000 loss = 0.33048033714294434
ts 16 epoch = 1000 loss = 0.3498995006084442
ts 17 epoch = 1000 loss = 0.34968432784080505
ts 18 epoch = 1000 loss = 0.3445361256599426
ts 19 epoch = 1000 loss = 0.3621918261051178
ts 20 epoch = 1000 loss = 0.40927445888519287
ts 21 epoch = 1000 loss = 0.3834321200847626
ts 22 epoch = 1000 loss = 0.347726047039032
ts 23 epoch =

## Teste

In [12]:
label_pred_list = []
y_true_list = []

def test():
    model.eval()
    with torch.no_grad():
        global label_pred_list
        global y_true_list
        for data in test_data:
            data.to(device)
            _, logits = model(data.x.float(), data.edge_index, None)
            label_pred = logits.max(1)[1].tolist()
            label_pred_list += label_pred
            y_true_list += data.y.tolist()
    model.train()

In [13]:
test()

In [14]:
label_pred_list

[1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,


In [15]:
y_true_list

[1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,


## Resultados

In [16]:
mean_precision = []
mean_recall = []
mean_f1 = []

prec = precision_score(y_true_list, label_pred_list, average='binary', labels=[0])
rec = recall_score(y_true_list, label_pred_list, average='binary', labels=[0])
f1 = f1_score(y_true_list, label_pred_list, average='binary', labels=[0])

In [17]:
print(classification_report(y_true_list,label_pred_list))

              precision    recall  f1-score   support

           0       0.87      0.53      0.66      1083
           1       0.97      0.99      0.98     15587

    accuracy                           0.96     16670
   macro avg       0.92      0.76      0.82     16670
weighted avg       0.96      0.96      0.96     16670



In [18]:
prec

0.9685059051427857

In [19]:
rec

0.9943542695836274

In [20]:
f1

0.9812598923710035