In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
os.chdir('/content/drive/MyDrive/Graph_Neural_Network')

In [3]:
!pip install torch_geometric==2.5.0
import torch_geometric
from torch_geometric.data import Data, DataLoader
import numpy as np
import torch
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
import torch.nn.functional as F
import networkx as nx
import seaborn as sns
import os

import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, SAGEConv, GATConv, TAGConv

class GCN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super().__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, out_channels)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return x

class GraphSAGE(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super().__init__()
        self.conv1 = SAGEConv(in_channels, hidden_channels)
        self.conv2 = SAGEConv(hidden_channels, out_channels)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return x

class GAT(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super().__init__()
        self.conv1 = GATConv(in_channels, hidden_channels, heads=2)
        self.conv2 = GATConv(hidden_channels * 2, out_channels, heads=1)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.elu(x)
        x = self.conv2(x, edge_index)
        return x

Collecting torch_geometric==2.5.0
  Downloading torch_geometric-2.5.0-py3-none-any.whl.metadata (64 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/64.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.2/64.2 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
Downloading torch_geometric-2.5.0-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m18.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch_geometric
Successfully installed torch_geometric-2.5.0


In [4]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, matthews_corrcoef

def train_model(model, data, train_idx, val_idx, optimizer, criterion, num_epochs=200):
    model.train()
    for epoch in range(num_epochs):
        optimizer.zero_grad()
        out = model(data.x, data.edge_index)
        loss = criterion(out[train_idx], data.y[train_idx].unsqueeze(1))
        loss.backward()
        optimizer.step()
    return model

def evaluate_model(model, data, test_idx):
    model.eval()
    with torch.no_grad():
        logits = model(data.x, data.edge_index)
        probs = torch.sigmoid(logits[test_idx]).squeeze().cpu().numpy()
        preds = (probs > 0.5).astype(int)
        labels = data.y[test_idx].cpu().numpy()

        return {
            'Accuracy': accuracy_score(labels, preds),
            'Precision': precision_score(labels, preds),
            'Recall': recall_score(labels, preds),
            'F1-score': f1_score(labels, preds),
            'AUC': roc_auc_score(labels, probs),
            'MCC': matthews_corrcoef(labels, preds)  # 新增MCC指标
        }


In [5]:
# === 数据读取和预训练 ===
df_classes = pd.read_csv('./data/elliptic_bitcoin_dataset/elliptic_txs_classes.csv')
df_edges = pd.read_csv('./data/elliptic_bitcoin_dataset/elliptic_txs_edgelist.csv')
df_features = pd.read_csv('./data/elliptic_bitcoin_dataset/elliptic_txs_features.csv', header=None)

df_classes = df_classes[df_classes['class'] != 'unknown']
df_classes['class'] = df_classes['class'].astype(str).map({'1': 1, '2': 0})
df_merge = df_features.merge(df_classes, how='inner', right_on='txId', left_on=0).drop(['txId'], axis=1)

nodes = df_merge[0].values
map_id = {j: i for i, j in enumerate(nodes)}

edges = df_edges[df_edges.txId1.isin(map_id) & df_edges.txId2.isin(map_id)].copy()
edges.txId1 = edges.txId1.map(map_id)
edges.txId2 = edges.txId2.map(map_id)
edge_index = torch.tensor(edges.values.T, dtype=torch.long)

x = torch.tensor(df_merge.drop([0, 'class', 1], axis=1).values, dtype=torch.float32)
y = torch.tensor(df_merge['class'].values, dtype=torch.float32)

data = Data(x=x, edge_index=edge_index, y=y)

# === 划分训练集、验证集、测试集（70/15/15） ===
all_idx = np.arange(len(y))
train_idx, temp_idx = train_test_split(all_idx, test_size=0.3, stratify=y)
val_idx, test_idx = train_test_split(temp_idx, test_size=0.5, stratify=y[temp_idx])

train_idx = torch.tensor(train_idx, dtype=torch.long)
val_idx = torch.tensor(val_idx, dtype=torch.long)
test_idx = torch.tensor(test_idx, dtype=torch.long)

# === GNNs模型对比 ===
models = {
    'GCN': GCN(x.shape[1], 100, 1),
    'GraphSAGE': GraphSAGE(x.shape[1], 100, 1),
    'GAT': GAT(x.shape[1], 64, 1),
}

results = {}
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

for name, model in models.items():
    print(f"\nTraining model: {name}")
    model = model.to(device)
    data = data.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.005, weight_decay=1e-5)
    criterion = torch.nn.BCEWithLogitsLoss()

    trained_model = train_model(model, data, train_idx, val_idx, optimizer, criterion)
    metrics = evaluate_model(trained_model, data, test_idx)
    results[name] = metrics

# === 打印对比结果 ===
df_result = pd.DataFrame(results).T
print(df_result.round(4))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_classes['class'] = df_classes['class'].astype(str).map({'1': 1, '2': 0})



Training model: GCN

Training model: GraphSAGE

Training model: GAT
           Accuracy  Precision  Recall  F1-score     AUC     MCC
GCN          0.9674     0.8941  0.7551    0.8188  0.9656  0.8044
GraphSAGE    0.9761     0.9214  0.8255    0.8708  0.9816  0.8593
GAT          0.9669     0.8977  0.7463    0.8151  0.9588  0.8012
