In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
os.chdir('/content/drive/MyDrive/Graph_Neural_Network')

In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

# 读取并预处理
df_classes = pd.read_csv('./data/elliptic_bitcoin_dataset/elliptic_txs_classes.csv')
df_features = pd.read_csv('./data/elliptic_bitcoin_dataset/elliptic_txs_features.csv', header=None)

df_classes = df_classes[df_classes['class'] != 'unknown']
df_classes['class'] = df_classes['class'].astype(str).map({'1': 1, '2': 0})

df_merge = df_features.merge(df_classes, how='inner', right_on='txId', left_on=0)
X = df_merge.drop(columns=[0, 'class', 'txId', 1]).values.astype(np.float32)
y = df_merge['class'].values.astype(int)

# 划分训练集、验证集、测试集（70/15/15）
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, stratify=y, random_state=0)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=0)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_classes['class'] = df_classes['class'].astype(str).map({'1': 1, '2': 0})


In [6]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, matthews_corrcoef

models = {
    'Logistic Regression': LogisticRegression(max_iter=1000),
    'SVM': SVC(probability=True),
    'KNN': KNeighborsClassifier(n_neighbors=5),
}

results = {}

for name, model in models.items():
    print(f"\nTraining {name}...")
    model.fit(X_train, y_train)
    y_probs = model.predict_proba(X_test)[:, 1]
    y_preds = (y_probs > 0.5).astype(int)

    results[name] = {
        'Accuracy': accuracy_score(y_test, y_preds),
        'Precision': precision_score(y_test, y_preds),
        'Recall': recall_score(y_test, y_preds),
        'F1-score': f1_score(y_test, y_preds),
        'MCC': matthews_corrcoef(y_test, y_preds)
    }

# 转为 DataFrame 输出结果
import pandas as pd
df_results = pd.DataFrame(results).T.round(4)
print(df_results)



Training Logistic Regression...

Training SVM...

Training KNN...
                     Accuracy  Precision  Recall  F1-score     MCC
Logistic Regression    0.9641     0.8394  0.7815    0.8094  0.7902
SVM                    0.9684     0.8835  0.7786    0.8277  0.8124
KNN                    0.9737     0.8819  0.8431    0.8621  0.8478
