In [1]:
import networkx as nx
import pandas as pd
from tqdm import tqdm

In [2]:
df_addr_addr = pd.read_csv('data/AddrAddr_edgelist.csv')
df_wallets_features = pd.read_csv('data/wallets_features_classes_combined.csv')

In [3]:
# drop all rows with class = 3
df_wallets_features = df_wallets_features[df_wallets_features['class'] != 3]

# change class 1 to 0 and class 2 to 1 - this is needed for binary classification
df_wallets_features['class'] = df_wallets_features['class'].apply(lambda x: 0 if x == 1 else 1)

# order by Time step from smallest to largest
df_wallets_features = df_wallets_features.sort_values('Time step', ascending=True)

# drop Time step
df_wallets_features.drop('Time step', axis=1, inplace=True)

In [4]:
# # check if there are any duplicates in the data that can cause problems... if its classes are different from time to time
# super_dict = {}
# reoccurences = 0
# problems = []
# for i, row in tqdm(df_wallets_features.iterrows(), total=len(df_wallets_features), desc='Checking wallet classes'):
#     addr = row['address']
#     class_ = row['class']
#     if addr in super_dict:
#         reoccurences += 1
#         if super_dict[addr] != class_:
#             print('Address with different classes:', addr)
#             problems.append(addr)
#     else: 
#         super_dict[addr] = class_
# print('Number of unique addresses in wallet features:', len(super_dict))
# print('Number of reoccurences:', reoccurences)
# if problems:
#     print('Problems:', problems)
# else: 
#     print('No problems found...')
# assert len(super_dict) + reoccurences == len(df_wallets_features)

In [5]:
# remove reoccurences, keep the last one
df_wallets_features = df_wallets_features.drop_duplicates(subset='address', keep='last')
df_wallets_features = df_wallets_features.reset_index(drop=True)

In [6]:
# we only want to keep the edges if both input and output addresses are in the wallet features
df_addr_addr = df_addr_addr[df_addr_addr['input_address'].isin(df_wallets_features['address'])]
df_addr_addr = df_addr_addr[df_addr_addr['output_address'].isin(df_wallets_features['address'])]

In [7]:
# normalized all columns except the address and class (index 0 and 1)
df_wallets_features.iloc[:, 2:] = df_wallets_features.iloc[:, 2:].apply(lambda x: (x - x.min()) / (x.max() - x.min()))

In [8]:
# create a graph from the edges dataframe
G = nx.from_pandas_edgelist(df_addr_addr, 'input_address', 'output_address')

# add isolated nodes to the graph
for address in df_wallets_features['address']:
    if address not in G:
        G.add_node(address)

print('Graph loaded.')
print('Number of nodes:', G.number_of_nodes())
print('Number of edges:', G.number_of_edges())

Graph loaded.
Number of nodes: 265354
Number of edges: 1090054


In [9]:
import torch
from torch_geometric.utils import from_networkx  # type: ignore

# add node features and class labels to the graph
for _, row in tqdm(
    df_wallets_features.iterrows(),
    total=len(df_wallets_features),
    desc="Adding node features",
):
    G.nodes[row["address"]].update(row.to_dict())
# convert the NetworkX graph to PyTorch Geometric data
data = from_networkx(G)

Adding node features: 100%|██████████| 265354/265354 [00:07<00:00, 35328.61it/s]


In [10]:
def train(model, data, optimizer, loss_fn):
    model.train()
    optimizer.zero_grad()
    out = model(data)
    loss = loss_fn(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    return loss.item()

In [11]:
from sklearn.metrics import recall_score, precision_score, f1_score


# def evaluate(mask, model, data, class_weights):
#     model.eval()
#     with torch.no_grad():
#         out = model(data)
#         pred = out[mask].max(dim=1)[1]
#         true_labels = data.y[mask].cpu()
#         pred_labels = pred.cpu()

#         # Calculate F1 scores for both classes
#         recall_0 = recall_score(true_labels, pred_labels, pos_label=0)
#         precision_0 = precision_score(true_labels, pred_labels, pos_label=0)
#         f1_0 = f1_score(true_labels, pred_labels, pos_label=0)

#         recall_1 = recall_score(true_labels, pred_labels, pos_label=1)
#         precision_1 = precision_score(true_labels, pred_labels, pos_label=1)
#         f1_1 = f1_score(true_labels, pred_labels, pos_label=1)

#         # Calculate weighted average F1 score
#         weighted_f1 = class_weights[0] * f1_0 + class_weights[1] * f1_1
#         r = 0.75
#         weighted_f1 = r * f1_0 + (1 - r) * f1_1

#     return (recall_0, precision_0, f1_0), (recall_1, precision_1, f1_1), weighted_f1


def evaluate(mask, model, data, _):
    model.eval()
    with torch.no_grad():
        out = model(data)
        pred = out[mask].max(dim=1)[1]
        true_labels = data.y[mask].cpu()
        pred_labels = pred.cpu()
        recall = recall_score(true_labels, pred_labels, pos_label=0)
        precision = precision_score(true_labels, pred_labels, pos_label=0)
        f1 = f1_score(true_labels, pred_labels, pos_label=0)
    return recall, precision, f1

In [12]:
from Model_GAT import GAT
from Model_GCN import GCN
from Model_TAGNN import TAGNN


# Update setup_model function
def setup_model(
    model_class,
    input_channels,
    hidden_channels_1,
    hidden_channels_2,
    output_channels,
    dropout,
    learning_rate,
    weight_decay,
    gat_heads=1,
    data=None,
):
    # Dynamically create the model based on the model_class
    if model_class == GAT:
        model = GAT(
            input_channels,
            hidden_channels_1,
            hidden_channels_2,
            output_channels,
            dropout,
            heads=gat_heads,
        )
    elif model_class == TAGNN:
        model = TAGNN(
            input_channels,
            hidden_channels_1,
            hidden_channels_2,
            output_channels,
            dropout,
        )
    elif model_class == GCN:
        model = GCN(
            input_channels,
            hidden_channels_1,
            hidden_channels_2,
            output_channels,
            dropout,
        )

    optimizer = torch.optim.Adam(
        model.parameters(), lr=learning_rate, weight_decay=weight_decay
    )
    if data is None:
        criterion = torch.nn.CrossEntropyLoss()
    else:
        # Adjust the loss function to give more importance to the minority class
        # Recount the classes after augmenting
        augmented_class_counts = data.y[data.train_mask].bincount().cpu().numpy()

        # Adjust the loss function to give more importance to the minority class
        class_weights = torch.tensor([augmented_class_counts[1] / augmented_class_counts[0], 1.0], dtype=torch.float).to("cuda")
        criterion = torch.nn.CrossEntropyLoss(weight=class_weights)

    return model, optimizer, criterion

In [13]:
import copy
from tqdm import tqdm


def train_model(model, data, optimizer, loss_fn, epochs, verbose=False):
    best_f1 = 0
    best_model_iter = 0
    best_model = None
    class_counts = data.y[data.train_mask].bincount().cpu().numpy()
    total_samples = sum(class_counts)
    class_weights = class_counts / total_samples

    if verbose:
        for epoch in range(epochs):
            loss = train(model, data, optimizer, loss_fn)
            _, _, train_f1 = evaluate(data.train_mask, model, data, class_weights)
            _, _, val_f1 = evaluate(data.val_mask, model, data, class_weights)

            if val_f1 > best_f1:
                best_f1 = val_f1
                best_model_iter = epoch
                best_model = copy.deepcopy(model.state_dict())

            if (epoch + 1) % 20 == 0:
                print(
                    f"Epoch: {epoch+1}, Loss: {loss:.4f}, Train F1: {train_f1:.4f}, Val F1: {val_f1:.4f}"
                )
    else:
        for epoch in tqdm(range(epochs)):
            loss = train(model, data, optimizer, loss_fn)
            _, _, train_f1 = evaluate(data.train_mask, model, data, class_weights)
            _, _, val_f1 = evaluate(data.val_mask, model, data, class_weights)

            if val_f1 > best_f1:
                best_f1 = val_f1
                best_model_iter = epoch
                best_model = copy.deepcopy(model.state_dict())

    print(f"Loading the best model at iteration {best_model_iter}")
    model.load_state_dict(best_model)

In [14]:
from sklearn.metrics import confusion_matrix, classification_report


def test_model(model, data):
    model.eval()
    with torch.no_grad():
        out = model(data)
        pred = out[data.test_mask].max(dim=1)[1]
        correct = pred.eq(data.y[data.test_mask]).sum().item()
        acc = correct / data.test_mask.sum().item()
        print(f"Test Accuracy: {acc:.4f}")
        print(confusion_matrix(data.y[data.test_mask].cpu(), pred.cpu()))
        print(classification_report(data.y[data.test_mask].cpu(), pred.cpu()))

In [15]:
import random
import numpy as np

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

In [16]:
# fixed hyperparameters
EPOCH = 300
INPUT_CHANNELS = 55
OUTPUT_CHANNELS = 2
WEIGHT_DECAY = 5e-4

# tunable hyperparameters
LEARNING_RATE = 0.01
HIDDEN_CHANNELS_1_GCN, HIDDEN_CHANNELS_1_GAT, HIDDEN_CHANNELS_1_TAGNN = 32, 16, 32
HIDDEN_CHANNELS_2_GCN, HIDDEN_CHANNELS_2_GAT, HIDDEN_CHANNELS_2_TAGNN = 16, 24, 16
DROPOUT = 0.5
GAT_HEADS = 2
SAMPLING_STRATEGY = 0.6

In [21]:
# from imblearn.over_sampling import SMOTE
# from sklearn.model_selection import train_test_split
# import torch
# import numpy as np


# def resample(df_wallets_features, sampling_strategy, verbose=False):
#     # Prepare the feature matrix and labels
#     feature_columns = [
#         col for col in df_wallets_features.columns if col not in ["address", "class"]
#     ]
#     data.x = torch.tensor(
#         df_wallets_features[feature_columns].values, dtype=torch.float
#     )
#     data.y = torch.tensor(df_wallets_features["class"].values, dtype=torch.long)

#     # Split the data into training, validation, and test sets
#     train_ratio = 0.7
#     val_ratio = 0.1
#     test_ratio = 0.2
#     assert train_ratio + val_ratio + test_ratio == 1.0
#     assert 0 < sampling_strategy <= 1

#     # Split indices into training and temp sets
#     train_idx, temp_idx, train_labels, temp_labels = train_test_split(
#         np.arange(len(data.y)),
#         data.y,
#         stratify=data.y,
#         test_size=(1 - train_ratio),
#         random_state=42,
#     )

#     # Split temp set into validation and test sets
#     val_idx, test_idx, val_labels, test_labels = train_test_split(
#         temp_idx,
#         temp_labels,
#         stratify=temp_labels,
#         test_size=(test_ratio / (val_ratio + test_ratio)),
#         random_state=42,
#     )

#     # Create boolean masks
#     train_mask = torch.zeros(len(data.y), dtype=torch.bool)
#     val_mask = torch.zeros(len(data.y), dtype=torch.bool)
#     test_mask = torch.zeros(len(data.y), dtype=torch.bool)

#     train_mask[train_idx] = True
#     val_mask[val_idx] = True
#     test_mask[test_idx] = True

#     # Apply SMOTE to handle class imbalance only on the training set
#     smote = SMOTE(sampling_strategy=SAMPLING_STRATEGY, random_state=42)
#     X_resampled, y_resampled = smote.fit_resample(
#         data.x[train_mask].numpy(), data.y[train_mask].numpy()
#     )

#     # Create new tensors for resampled training features and labels
#     new_train_features = torch.tensor(X_resampled, dtype=torch.float)
#     new_train_labels = torch.tensor(y_resampled, dtype=torch.long)

#     # Combine resampled training data with the original validation and test data
#     combined_features = torch.cat(
#         (new_train_features, data.x[val_mask], data.x[test_mask]), dim=0
#     )
#     combined_labels = torch.cat(
#         (new_train_labels, data.y[val_mask], data.y[test_mask]), dim=0
#     )

#     # Update indices for the combined dataset
#     new_train_idx = torch.arange(len(new_train_labels))
#     new_val_idx = torch.arange(
#         len(new_train_labels), len(new_train_labels) + len(data.y[val_mask])
#     )
#     new_test_idx = torch.arange(
#         len(new_train_labels) + len(data.y[val_mask]), len(combined_labels)
#     )

#     # Create new boolean masks for the combined dataset
#     new_train_mask = torch.zeros(len(combined_labels), dtype=torch.bool)
#     new_val_mask = torch.zeros_like(new_train_mask)
#     new_test_mask = torch.zeros_like(new_train_mask)

#     new_train_mask[new_train_idx] = True
#     new_val_mask[new_val_idx] = True
#     new_test_mask[new_test_idx] = True

#     # Update the data object
#     data.x = combined_features
#     data.y = combined_labels
#     data.train_mask = new_train_mask
#     data.val_mask = new_val_mask
#     data.test_mask = new_test_mask

#     # Verify the class distribution in the new training set
#     if verbose:
#         print(
#             "Class distribution in the new training set:",
#             data.y[data.train_mask].bincount().cpu().numpy(),
#         )
#         print(
#             "Class distribution in validation set:",
#             data.y[data.val_mask].bincount().cpu().numpy(),
#         )
#         print(
#             "Class distribution in test set:",
#             data.y[data.test_mask].bincount().cpu().numpy(),
#         )

#     return data

# set_seed(3407)
# data = resample(df_wallets_features, SAMPLING_STRATEGY, verbose=True)

Class distribution in the new training set: [105456 175761]
Class distribution in validation set: [ 1427 25108]
Class distribution in test set: [ 2853 50219]


In [18]:
# import warnings
# from sklearn.exceptions import UndefinedMetricWarning

# warnings.filterwarnings("ignore", category=UndefinedMetricWarning)

# # gcn_model, gcn_optimizer, gcn_loss_fn = setup_model(
# #     GCN,
# #     INPUT_CHANNELS,
# #     HIDDEN_CHANNELS_1_GCN,
# #     HIDDEN_CHANNELS_2_GCN,
# #     OUTPUT_CHANNELS,
# #     DROPOUT,
# #     LEARNING_RATE,
# #     WEIGHT_DECAY,
# #     data=data,
# # )
# set_seed(3407)
# gat_model, gat_optimizer, gat_loss_fn = setup_model(
#     GAT,
#     INPUT_CHANNELS,
#     HIDDEN_CHANNELS_1_GAT,
#     HIDDEN_CHANNELS_2_GAT,
#     OUTPUT_CHANNELS,
#     DROPOUT,
#     LEARNING_RATE,
#     WEIGHT_DECAY,
#     GAT_HEADS,
#     data=data,
# )
# # tagnn_model, tagnn_optimizer, tagnn_loss_fn = setup_model(
# #     TAGNN,
# #     INPUT_CHANNELS,
# #     HIDDEN_CHANNELS_1_TAGNN,
# #     HIDDEN_CHANNELS_2_TAGNN,
# #     OUTPUT_CHANNELS,
# #     DROPOUT,
# #     LEARNING_RATE,
# #     WEIGHT_DECAY,
# #     data=data,
# # )

# data.to("cuda")
# # gcn_model.to("cuda")
# gat_model.to("cuda")
# # tagnn_model.to("cuda")

# # train_model(gcn_model, data, gcn_optimizer, gcn_loss_fn, EPOCH)
# # test_model(gcn_model, data)
# set_seed(3407)
# train_model(gat_model, data, gat_optimizer, gat_loss_fn, EPOCH)

# test_model(gat_model, data)
# # train_model(tagnn_model, data, tagnn_optimizer, tagnn_loss_fn, EPOCH)
# # test_model(tagnn_model, data)

# Non-Graph-Based Models

In [24]:
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
import numpy as np

train_ratio = 0.7
val_ratio = 0.1
test_ratio = 0.2

# Prepare the feature matrix and labels
feature_columns = [col for col in df_wallets_features.columns if col not in ['address', 'class']]
X = df_wallets_features[feature_columns].values
y = df_wallets_features['class'].values

# Split data into training and temp sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=(1 - train_ratio), stratify=y, random_state=42)

# Split temp set into validation and test sets
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=(test_ratio / (val_ratio + test_ratio)), stratify=y_temp, random_state=42)

# Apply SMOTE to handle class imbalance only on the training set
smote = SMOTE(sampling_strategy=SAMPLING_STRATEGY, random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

# Update training data with resampled data
X_train = X_train_resampled
y_train = y_train_resampled

# Verify the class distribution in the new training set
print("Class distribution in the new training set:", np.bincount(y_train))
print("Class distribution in validation set:", np.bincount(y_val))
print("Class distribution in test set:", np.bincount(y_test))

Class distribution in the new training set: [105456 175761]
Class distribution in validation set: [ 1427 25108]
Class distribution in test set: [ 2853 50219]


In [25]:
from Model_Linear_Classification import LinearClassifier
from sklearn.metrics import (
    accuracy_score,
    confusion_matrix,
    classification_report,
    f1_score,
)
import copy
import torch
from tqdm import tqdm

# Hyperparameters
input_dim = 55  # Number of input features
hidden_dim1 = 32
hidden_dim2 = 16
output_dim = 2  # Binary classification
dropout = 0.5
learning_rate = 0.01
epochs = 200

set_seed(3407)

# Setup
model = LinearClassifier(input_dim, hidden_dim1, hidden_dim2, output_dim, dropout)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# Training loop
best_val_f1 = 0
best_model_iter = 0
best_model = None

for epoch in tqdm(range(epochs)):
    model.train()
    optimizer.zero_grad()
    output = model(X_train_tensor)
    loss = criterion(output, y_train_tensor)
    loss.backward()
    optimizer.step()

    # Evaluate on validation set
    model.eval()
    with torch.no_grad():
        val_output = model(X_val_tensor)
        val_pred = val_output.argmax(dim=1)
        val_f1 = f1_score(
            y_val_tensor, val_pred, pos_label=0
        )  # F1 score for minority class
        if val_f1 > best_val_f1:
            best_model_iter = epoch
            best_val_f1 = val_f1
            best_model = copy.deepcopy(model.state_dict())

# Load the best model
print(f"Loading the best model at iteration {best_model_iter}")
model.load_state_dict(best_model)

# Test the model
model.eval()
with torch.no_grad():
    test_output = model(X_test_tensor)
    test_pred = test_output.argmax(dim=1)
    test_f1 = f1_score(
        y_test_tensor, test_pred, pos_label=0
    )  # F1 score for minority class
    test_acc = accuracy_score(y_test_tensor, test_pred)
    print(f"Test Accuracy: {test_acc:.4f}")
    print(f"Test F1 Score (minority class): {test_f1:.4f}")
    print(confusion_matrix(y_test_tensor, test_pred))
    print(classification_report(y_test_tensor, test_pred))

100%|██████████| 200/200 [00:07<00:00, 27.16it/s]


Loading the best model at iteration 12
Test Accuracy: 0.9143
Test F1 Score (minority class): 0.4344
[[ 1747  1106]
 [ 3444 46775]]
              precision    recall  f1-score   support

           0       0.34      0.61      0.43      2853
           1       0.98      0.93      0.95     50219

    accuracy                           0.91     53072
   macro avg       0.66      0.77      0.69     53072
weighted avg       0.94      0.91      0.93     53072



In [None]:
# from Model_SVM import SVMClassifierWrapper

# # Choose the classifier
# model_svm = SVMClassifierWrapper(C=1.0, kernel="rbf", gamma="scale", random_state=42)

# # Convert data to numpy arrays (as these models do not use PyTorch tensors)
# X_train_np = X_train
# y_train_np = y_train
# X_val_np = X_val
# y_val_np = y_val
# X_test_np = X_test
# y_test_np = y_test

In [None]:
# # Training
# model_svm.fit(X_train_np, y_train_np)

# # # Evaluate on validation set
# # val_pred = model_svm.predict(X_val_np)
# # val_f1 = f1_score(y_val_np, val_pred, pos_label=0)  # F1 score for minority class

# # Test the model
# test_pred = model_svm.predict(X_test_np)
# test_f1 = f1_score(y_test_np, test_pred, pos_label=0)  # F1 score for minority class
# test_acc = accuracy_score(y_test_np, test_pred)
# print(f"Test Accuracy: {test_acc:.4f}")
# print(f"Test F1 Score (minority class): {test_f1:.4f}")
# print(confusion_matrix(y_test_np, test_pred))
# print(classification_report(y_test_np, test_pred))