# 库导入

In [2]:
# -*- coding: utf-8 -*-
import networkx as nx
import numpy as np
from node2vec import Node2Vec
import tempfile
from pecanpy import pecanpy as node2vec
from sklearn.model_selection import KFold
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.svm import SVC
import xgboost as xgb
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    roc_auc_score, average_precision_score, matthews_corrcoef
)
import random
import os
from tqdm import tqdm
from pecanpy.graph import AdjlstGraph
import pandas as pd

# 待调用的函数

In [3]:
# 0. 评价指标
def evaluate_metrics(y_true, y_scores):
    threshold = np.median([s for s in y_scores if s > 0]) if any(y_scores) else 0
    y_pred = [int(score > threshold) for score in y_scores]
    return {
        'accuracy': accuracy_score(y_true, y_pred),
        'precision': precision_score(y_true, y_pred, zero_division=0),
        'recall': recall_score(y_true, y_pred, zero_division=0),
        'f1': f1_score(y_true, y_pred, zero_division=0),
        'roc_auc': roc_auc_score(y_true, y_scores),
        'average_precision': average_precision_score(y_true, y_scores),
        'mcc': matthews_corrcoef(y_true, y_pred)
    }

# 1. Node2Vec节点嵌入向量生成函数
def generate_node2vec_embeddings(G, dimensions=64, walk_length=30, num_walks=200, p=1, q=1):
    """
    利用Node2Vec算法计算图中每个节点的嵌入向量.
    参数:
        G: 输入的 networkx 图
        dimensions: 嵌入向量维度
        walk_length: 每次随机游走的步长
        num_walks: 每个节点开始的随机游走次数
        p: 返回概率参数 (控制随机游走更偏向 BFS 还是 DFS, p<1 趋向于返回原节点, p>1 趋向于远离)
        q: 离开概率参数 (控制随机游走更偏向 BFS 还是 DFS, q>1 趋向于局部, q<1 趋向于探索远邻)
    返回:
        embeddings: 字典 {节点: 向量(numpy.ndarray)}
    """

    # 使用 node2vec 库进行节点嵌入计算 (需要预先 pip install node2vec)
#     node2vec = Node2Vec(G, dimensions=dimensions, walk_length=walk_length, 
#                         num_walks=num_walks, p=p, q=q, workers=os.cpu_count()
# )
#     model = node2vec.fit(window=10, min_count=1, batch_words=4)
#     # 提取每个节点的向量表示
#     embeddings = {}
#     for node in G.nodes():
#         try:
#             embeddings[node] = model.wv[node]  # 尝试直接通过节点索引获取向量
#         except KeyError:
#             # 若节点索引需要字符串类型，则进行转换
#             embeddings[node] = model.wv[str(node)]
#     return embeddings


    # 以下为使用pecanpy进行node2vec嵌入的实现，相比于node2vec库，pecanpy提供了更高效的SparseOTF实现，从而加速
    adj_graph = AdjlstGraph()
    for node in G.nodes():
        adj_graph.add_node(str(node))
    for u, v in G.edges():
        adj_graph.add_edge(str(u), str(v), weight=1, directed=False)

    # 构建 SparseOTF
    model = node2vec.SparseOTF(p=p, q=q, workers=os.cpu_count(), verbose=True)
    graph = model.from_adjlst_graph(adj_graph)
    graph.preprocess_transition_probs()
    # 生成节点嵌入矩阵，使用传入参数
    embed_matrix = graph.embed(dim=dimensions, walk_length=walk_length, num_walks=num_walks)
    # **构建节点ID到向量的映射字典**
    embeddings = {}
    for idx, node_id in enumerate(graph.nodes):   # graph.nodes 是字符串ID列表
        # 转回整型作为字典键（如果节点ID本来就是非数字字符串则直接用原值）
        try:
            node_key = int(node_id)
        except ValueError:
            node_key = node_id
        embeddings[node_key] = embed_matrix[idx]
    return embeddings

# 3.嵌入向量组合函数，将两个节点嵌入向量合成为边特征向量
def combine_embeddings(vec_u, vec_v, method='hadamard'):
    """
    将节点 u 和 v 的嵌入向量组合为边的特征向量.
    支持的组合方式:
        - hadamard: 元素对应相乘 (Hadamard 积)
        - average: 向量平均
        - L1: 向量差的绝对值
        - L2: 向量差的平方
    返回:
        edge_feature: numpy.ndarray 类型的边特征向量
    """
    try:
        if method == 'hadamard':
            return vec_u * vec_v
        elif method == 'average':
            return (vec_u + vec_v) / 2
        elif method == 'L1':
            return np.abs(vec_u - vec_v)
        elif method == 'L2':
            return (vec_u - vec_v) ** 2
    except Exception as e:
        print(f"Error combining embeddings: {e}")
        return None

# 4. 预先构建特征
def precompute_structural_features(G, edge_list, structural_features):
    """
    预计算结构特征字典，避免重复计算。
    返回一个字典结构 {(u,v): [cn, jc, aa, pa], ...}
    """
    feature_dict = {}
    neighbors = {n: set(G.neighbors(n)) for n in G.nodes()}
    degrees = dict(G.degree())

    for u, v in tqdm(edge_list, desc="Precomputing structural features"):
        feat = []
        Nu, Nv = neighbors[u], neighbors[v]
        inter = Nu & Nv
        union = Nu | Nv

        if 'CN' in structural_features:
            feat.append(len(inter))
        if 'JC' in structural_features:
            feat.append(len(inter) / len(union) if union else 0.0)
        if 'AA' in structural_features:
            aa = sum(1 / np.log(degrees[w]) for w in inter if degrees[w] > 1)
            feat.append(aa)
        if 'PA' in structural_features:
            feat.append(degrees[u] * degrees[v])
        if 'HI' in structural_features:
            feat.append(abs(degrees[u] - degrees[v]))
        if 'RA' in structural_features:
            # 资源分配指数
            ra = sum(1 / degrees[w] for w in inter) if inter else 0.0
            feat.append(ra)

        feature_dict[(u, v)] = np.array(feat, dtype=float)
    return feature_dict

# 5. 五折交叉验证构建数据集、模型训练与评估
def link_prediction_cross_validation(G, structural_features, embed_method, dimensions, walk_length, num_walks, p, q):
    """
    对给定图进行5折交叉验证的链路预测实验。
    参数:
        G: 原始完整图 (networkx.Graph)
        structural_features: 元组或列表, 指定使用的结构特征, 例如 ('CN','JC','AA','PA')
        use_node2vec: 是否使用 Node2Vec 嵌入特征
        embed_dim: 节点嵌入维度 (仅当 use_node2vec=True 时有效)
        embed_method: 嵌入向量组合方式 ('hadamard', 'average', 'L1', 'L2')
    返回:
        results: 长度为5的列表, 包含每折的测试准确率 (或其他评价指标)
    """
    # 获取原始图中的所有真实边 (正样本边) 列表
    all_positive_edges = list(G.edges())
    kf = KFold(n_splits=5, shuffle=True)
    results = []
    for fold, (train_idx, test_idx) in enumerate(kf.split(all_positive_edges), start=1):
        print(f"\n----- Fold {fold}/5 -----")
        # 划分训练集正样本边和测试集正样本边
        train_pos_edges = [all_positive_edges[i] for i in train_idx]
        test_pos_edges = [all_positive_edges[i] for i in test_idx]
        # 构建训练子图 (将测试正边移除, 避免训练时看到测试边)
        G_train = nx.Graph()
        G_train.add_nodes_from(G.nodes())        # 保留所有节点
        G_train.add_edges_from(train_pos_edges)  # 添加训练正样本边
        # 准备负样本边 (不存在于原图中的节点对)
        existing_edges = set(tuple(sorted(edge)) for edge in G.edges())
        train_neg_edges = []
        test_neg_edges = []
        train_neg_set = set()
        test_neg_set = set()
        nodes = list(G.nodes())
        # 生成测试负样本 (数量与测试正样本相同)
        while len(test_neg_edges) < len(test_pos_edges):
            u = random.choice(nodes); v = random.choice(nodes)
            if u == v:
                continue
            pair = tuple(sorted((u, v)))
            if pair in existing_edges or pair in test_neg_set:
                continue
            test_neg_set.add(pair)
            test_neg_edges.append((u, v))
        # 生成训练负样本 (数量与训练正样本相同)
        while len(train_neg_edges) < len(train_pos_edges):
            u = random.choice(nodes); v = random.choice(nodes)
            if u == v:
                continue
            pair = tuple(sorted((u, v)))
            if pair in existing_edges or pair in train_neg_set or pair in test_neg_set:
                continue
            train_neg_set.add(pair)
            train_neg_edges.append((u, v))
        # 计算训练图的节点嵌入
        embeddings = None
        if embed_method is not None:
            print("Generating Node2Vec embeddings...")
            embeddings = generate_node2vec_embeddings(G_train, dimensions=dimensions, walk_length=walk_length, num_walks=num_walks, p=p, q=q)

        # 预计算结构特征
        all_edges = train_pos_edges + train_neg_edges + test_pos_edges + test_neg_edges
        struct_feat_dict = precompute_structural_features(G_train, all_edges, structural_features)
        
        # 构建训练数据
        X_train, y_train = [], []
        for (u, v), label in tqdm([(e, 1) for e in train_pos_edges] + [(e, 0) for e in train_neg_edges], desc="Building training set"):
            feat = struct_feat_dict[(u, v)].copy()
            if embed_method is not None:
                feat = np.concatenate([feat, combine_embeddings(embeddings[u], embeddings[v], method=embed_method)])
            X_train.append(feat)
            y_train.append(label)


        # 构建测试数据
        X_test, y_test = [], []
        for (u, v), label in tqdm([(e, 1) for e in test_pos_edges] + [(e, 0) for e in test_neg_edges], desc="Building test set"):
            feat = struct_feat_dict[(u, v)].copy()
            if embed_method is not None:
                feat = np.concatenate([feat, combine_embeddings(embeddings[u], embeddings[v], method=embed_method)])
            X_test.append(feat)
            y_test.append(label)


        # 转换为 numpy 数组
        X_train = np.array(X_train)
        y_train = np.array(y_train)
        X_test = np.array(X_test)
        y_test = np.array(y_test)

        models = {
            'LogisticRegression': LogisticRegression(max_iter=1000,n_jobs=-1),
            'RandomForest': RandomForestClassifier(n_jobs=-1),
            'XGBoost': xgb.XGBClassifier(n_jobs=-1)
        }
        trained_models = {}
        fold_results = {}
        for name, model in models.items():
            model.fit(X_train, y_train)
            trained_models[name] = model
            y_scores = model.predict_proba(X_test)[:, 1]
            metrics = evaluate_metrics(y_test, y_scores)
            print(f"✅ Fold {fold} {name} Metrics: {metrics}")

            # 存储结果
            fold_results[name] = {k: [v] for k, v in metrics.items()}

        # 构建 VotingClassifier 并使用训练好的模型
        voting_clf = VotingClassifier(
            estimators=[
                ('lr', trained_models['LogisticRegression']),
                ('rf', trained_models['RandomForest']),
                ('xgb', trained_models['XGBoost'])
            ],
            voting='soft'
        )
        # 注意 VotingClassifier 仍然需要 fit 一下，用来统一接口
        voting_clf.fit(X_train, y_train)
        y_scores = voting_clf.predict_proba(X_test)[:, 1]
        metrics = evaluate_metrics(y_test, y_scores)
        print(f"✅ Fold {fold} Voting Metrics: {metrics}")
        fold_results['Voting'] = {k: [v] for k, v in metrics.items()}

        results.append(fold_results)

    # 汇总5折平均指标
    average_results = {}
    for model_name in results[0].keys():
        average_results[model_name] = {}
        for metric in results[0][model_name].keys():
            all_scores = []
            for fold in results:
                all_scores.extend(fold[model_name][metric])
            average_results[model_name][metric] = np.mean(all_scores)

    print("\n5折交叉验证结果:")
    for model, scores in average_results.items():
        print(f"\n{model}:")
        for k, v in scores.items():
            print(f"  {k}: {v:.4f}")

    return average_results

# 主函数

In [12]:
######## ！！！！！！参数设置 ！！！！！！#########
dataset_name = "DBLP"  # 可替换为其他数据集名称
# dataset_name = "email-Eu-core"
edges_file_path = f"norm_dataset/{dataset_name}_edges.txt"
G = nx.read_edgelist(edges_file_path, nodetype=int)
structural_features=['HI']  
#structural_features=('CN','JC','AA','RA') # 可根据需要选择特征
# 如果不使用Node2Vec嵌入，embed_method设置为None，那么此时dimensions、walk_length、num_walks、p、q等参数将不生效
# embed_method='hadamard'
embed_method=None
dimensions=64
walk_length=40
num_walks=400
p=3
q=0.3
runs =10 # 设置重复次数


# 主运行
for run in range(runs):
    print(f"\n=== 运行 {run + 1}/{runs} ===")
    # 运行链路预测交叉验证实验
    results = link_prediction_cross_validation(G, structural_features=structural_features, embed_method=embed_method, dimensions=dimensions, walk_length=walk_length, num_walks=num_walks, p=p, q=q)

    # ==== 格式化输出 ====
    # 将 results 字典展开为 1 行，列名为 模型_指标，如 Log_acc, Log_f1 等
    row_data = {}
    model_name_map = {
        "LogisticRegression": "Log",
        "RandomForest": "RF",
        "XGBoost": "XGB",
        "Voting": "Voting"
    }

    for model, metrics in results.items():
        short_name = model_name_map.get(model, model)
        for metric, value in metrics.items():
            col_name = f"{short_name}_{metric}"
            row_data[col_name] = round(value, 4)

    # ==== 创建 DataFrame 并写入 ====
    output_dir = f"results/{dataset_name}"
    os.makedirs(output_dir, exist_ok=True)

    # 文件名包含参数信息
    if embed_method:
        param_str = f"emd_{embed_method}_dim_{dimensions}_walk_{walk_length}_num_{num_walks}_p_{p}_q_{q}_{'_'.join(structural_features)}"
    else:
        param_str = f"emb_no_{'_'.join(structural_features)}"
    file_name = f"{dataset_name}_{param_str}.xlsx"
    file_path = os.path.join(output_dir, file_name)

    # 加载已有文件（如果有），否则新建
    if os.path.exists(file_path):
        df = pd.read_excel(file_path)
        df = pd.concat([df, pd.DataFrame([row_data])], ignore_index=True)
    else:
        df = pd.DataFrame([row_data])

    # 写入 Excel
    df.to_excel(file_path, index=False)
print(f"✅ 实验结果已保存至: {file_path}")


=== 运行 1/10 ===

----- Fold 1/5 -----


Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 366065.70it/s]
Building training set: 100%|██████████| 84586/84586 [00:00<00:00, 905379.76it/s]
Building test set: 100%|██████████| 21148/21148 [00:00<00:00, 641330.52it/s]


✅ Fold 1 LogisticRegression Metrics: {'accuracy': 0.6350955173066011, 'precision': 0.6358277075211562, 'recall': 0.6324002269718176, 'f1': 0.6341093357356218, 'roc_auc': 0.6934084772922108, 'average_precision': 0.70480956002927, 'mcc': 0.2701949603529293}
✅ Fold 1 RandomForest Metrics: {'accuracy': 0.6349063741252128, 'precision': 0.6356891467706649, 'recall': 0.632021940609041, 'f1': 0.6338502394840423, 'roc_auc': 0.6929991291614674, 'average_precision': 0.7034171291187584, 'mcc': 0.2698172380232601}
✅ Fold 1 XGBoost Metrics: {'accuracy': 0.6350955173066011, 'precision': 0.6358277075211562, 'recall': 0.6324002269718176, 'f1': 0.6341093357356218, 'roc_auc': 0.6932991842301451, 'average_precision': 0.704188253434959, 'mcc': 0.2701949603529293}
✅ Fold 1 Voting Metrics: {'accuracy': 0.6350955173066011, 'precision': 0.6358277075211562, 'recall': 0.6324002269718176, 'f1': 0.6341093357356218, 'roc_auc': 0.693374124211082, 'average_precision': 0.7043898686621649, 'mcc': 0.2701949603529293}

-

Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 457093.70it/s]
Building training set: 100%|██████████| 84586/84586 [00:00<00:00, 1237579.53it/s]
Building test set: 100%|██████████| 21148/21148 [00:00<00:00, 801571.86it/s]


✅ Fold 2 LogisticRegression Metrics: {'accuracy': 0.6372706638925667, 'precision': 0.6396075791093585, 'recall': 0.6289010781161339, 'f1': 0.6342091459634733, 'roc_auc': 0.6922575641084303, 'average_precision': 0.7067555995745544, 'mcc': 0.2745797990914551}
✅ Fold 2 RandomForest Metrics: {'accuracy': 0.6372706638925667, 'precision': 0.6396075791093585, 'recall': 0.6289010781161339, 'f1': 0.6342091459634733, 'roc_auc': 0.6921577648749557, 'average_precision': 0.7057822727092993, 'mcc': 0.2745797990914551}
✅ Fold 2 XGBoost Metrics: {'accuracy': 0.6372706638925667, 'precision': 0.6396075791093585, 'recall': 0.6289010781161339, 'f1': 0.6342091459634733, 'roc_auc': 0.6922517238163247, 'average_precision': 0.7063139764710035, 'mcc': 0.2745797990914551}
✅ Fold 2 Voting Metrics: {'accuracy': 0.6372706638925667, 'precision': 0.6396075791093585, 'recall': 0.6289010781161339, 'f1': 0.6342091459634733, 'roc_auc': 0.6922372169958116, 'average_precision': 0.7066650848141002, 'mcc': 0.274579799091455

Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 538607.84it/s]
Building training set: 100%|██████████| 84588/84588 [00:00<00:00, 1472295.11it/s]
Building test set: 100%|██████████| 21146/21146 [00:00<00:00, 792671.01it/s]


✅ Fold 3 LogisticRegression Metrics: {'accuracy': 0.636810744348813, 'precision': 0.6374869309001046, 'recall': 0.6343516504303415, 'f1': 0.6359154261875415, 'roc_auc': 0.6911505985566704, 'average_precision': 0.7064183398181181, 'mcc': 0.27362479801414713}
✅ Fold 3 RandomForest Metrics: {'accuracy': 0.6364324222075097, 'precision': 0.6372110720060877, 'recall': 0.6335950061477348, 'f1': 0.6353978943374751, 'roc_auc': 0.6906285494257635, 'average_precision': 0.7049749722414317, 'mcc': 0.2728692381526078}
✅ Fold 3 XGBoost Metrics: {'accuracy': 0.6365742930104984, 'precision': 0.6373145682769114, 'recall': 0.6338787477537123, 'f1': 0.6355920147944426, 'roc_auc': 0.6908273045212081, 'average_precision': 0.705476428677981, 'mcc': 0.27315255548323475}
✅ Fold 3 Voting Metrics: {'accuracy': 0.636810744348813, 'precision': 0.6374869309001046, 'recall': 0.6343516504303415, 'f1': 0.6359154261875415, 'roc_auc': 0.6910996361704271, 'average_precision': 0.7061162935907423, 'mcc': 0.2736247980141471

Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 581071.23it/s]
Building training set: 100%|██████████| 84588/84588 [00:00<00:00, 1522955.82it/s]
Building test set: 100%|██████████| 21146/21146 [00:00<00:00, 842102.41it/s]


✅ Fold 4 LogisticRegression Metrics: {'accuracy': 0.62735269081623, 'precision': 0.6279334916864608, 'recall': 0.6250827579684101, 'f1': 0.6265048819793345, 'roc_auc': 0.6818437012328898, 'average_precision': 0.6899344981233678, 'mcc': 0.254708006460454}
✅ Fold 4 RandomForest Metrics: {'accuracy': 0.62735269081623, 'precision': 0.6279334916864608, 'recall': 0.6250827579684101, 'f1': 0.6265048819793345, 'roc_auc': 0.6817180664718585, 'average_precision': 0.6894769504128649, 'mcc': 0.254708006460454}
✅ Fold 4 XGBoost Metrics: {'accuracy': 0.62735269081623, 'precision': 0.6279334916864608, 'recall': 0.6250827579684101, 'f1': 0.6265048819793345, 'roc_auc': 0.6817816509270838, 'average_precision': 0.6899901648180872, 'mcc': 0.254708006460454}
✅ Fold 4 Voting Metrics: {'accuracy': 0.62735269081623, 'precision': 0.6279334916864608, 'recall': 0.6250827579684101, 'f1': 0.6265048819793345, 'roc_auc': 0.6818253987855923, 'average_precision': 0.6902684011336858, 'mcc': 0.254708006460454}

----- Fo

Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 569261.04it/s]
Building training set: 100%|██████████| 84588/84588 [00:00<00:00, 1345784.37it/s]
Building test set: 100%|██████████| 21146/21146 [00:00<00:00, 921712.97it/s]


✅ Fold 5 LogisticRegression Metrics: {'accuracy': 0.6427693180743403, 'precision': 0.6427963295809289, 'recall': 0.6426747375390145, 'f1': 0.6427355278093076, 'roc_auc': 0.698703095383061, 'average_precision': 0.7100219321920473, 'mcc': 0.28553864125723977}
✅ Fold 5 RandomForest Metrics: {'accuracy': 0.6426274472713516, 'precision': 0.6426949280847842, 'recall': 0.642390995933037, 'f1': 0.6425429260678303, 'roc_auc': 0.6982969841153991, 'average_precision': 0.7082049513221214, 'mcc': 0.28525492643947453}
✅ Fold 5 XGBoost Metrics: {'accuracy': 0.6427693180743403, 'precision': 0.6427963295809289, 'recall': 0.6426747375390145, 'f1': 0.6427355278093076, 'roc_auc': 0.6985477526907125, 'average_precision': 0.7087244232178418, 'mcc': 0.28553864125723977}
✅ Fold 5 Voting Metrics: {'accuracy': 0.6427693180743403, 'precision': 0.6427963295809289, 'recall': 0.6426747375390145, 'f1': 0.6427355278093076, 'roc_auc': 0.6985640110963641, 'average_precision': 0.7093896106748092, 'mcc': 0.28553864125723

Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 520759.72it/s]
Building training set: 100%|██████████| 84586/84586 [00:00<00:00, 1619952.87it/s]
Building test set: 100%|██████████| 21148/21148 [00:00<00:00, 998954.22it/s]


✅ Fold 1 LogisticRegression Metrics: {'accuracy': 0.6389256667297144, 'precision': 0.6405741626794258, 'recall': 0.6330622281066768, 'f1': 0.6367960426179604, 'roc_auc': 0.6968747040724884, 'average_precision': 0.7060412558091524, 'mcc': 0.27787044043919723}
✅ Fold 1 RandomForest Metrics: {'accuracy': 0.6389256667297144, 'precision': 0.6405741626794258, 'recall': 0.6330622281066768, 'f1': 0.6367960426179604, 'roc_auc': 0.6966373002231045, 'average_precision': 0.7052211446964671, 'mcc': 0.27787044043919723}
✅ Fold 1 XGBoost Metrics: {'accuracy': 0.6389256667297144, 'precision': 0.6405741626794258, 'recall': 0.6330622281066768, 'f1': 0.6367960426179604, 'roc_auc': 0.6963380813983959, 'average_precision': 0.7048945043571282, 'mcc': 0.27787044043919723}
✅ Fold 1 Voting Metrics: {'accuracy': 0.6389256667297144, 'precision': 0.6405741626794258, 'recall': 0.6330622281066768, 'f1': 0.6367960426179604, 'roc_auc': 0.696803583088074, 'average_precision': 0.7063007331722265, 'mcc': 0.2778704404391

Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 453072.54it/s]
Building training set: 100%|██████████| 84586/84586 [00:00<00:00, 1203572.25it/s]
Building test set: 100%|██████████| 21148/21148 [00:00<00:00, 825603.05it/s]


✅ Fold 2 LogisticRegression Metrics: {'accuracy': 0.6401550974087384, 'precision': 0.658434894162925, 'recall': 0.5824664270853036, 'f1': 0.6181252509032517, 'roc_auc': 0.6902743466931194, 'average_precision': 0.7046303050617693, 'mcc': 0.28219476624211065}
✅ Fold 2 RandomForest Metrics: {'accuracy': 0.6400605258180443, 'precision': 0.6583618477331052, 'recall': 0.5822772839039153, 'f1': 0.6179865502358727, 'roc_auc': 0.6899592615924611, 'average_precision': 0.7040584157163581, 'mcc': 0.28201059390354716}
✅ Fold 2 XGBoost Metrics: {'accuracy': 0.6400605258180443, 'precision': 0.6583618477331052, 'recall': 0.5822772839039153, 'f1': 0.6179865502358727, 'roc_auc': 0.6901403508947668, 'average_precision': 0.704419130997852, 'mcc': 0.28201059390354716}
✅ Fold 2 Voting Metrics: {'accuracy': 0.6401550974087384, 'precision': 0.658434894162925, 'recall': 0.5824664270853036, 'f1': 0.6181252509032517, 'roc_auc': 0.6902496886757612, 'average_precision': 0.7051131788396389, 'mcc': 0.282194766242110

Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 407145.27it/s]
Building training set: 100%|██████████| 84588/84588 [00:00<00:00, 1209039.44it/s]
Building test set: 100%|██████████| 21146/21146 [00:00<00:00, 868557.53it/s]


✅ Fold 3 LogisticRegression Metrics: {'accuracy': 0.6378984205050601, 'precision': 0.6390160183066361, 'recall': 0.6338787477537123, 'f1': 0.6364370162860263, 'roc_auc': 0.6939453267970398, 'average_precision': 0.705192503185835, 'mcc': 0.2758057539654742}
✅ Fold 3 RandomForest Metrics: {'accuracy': 0.6378984205050601, 'precision': 0.6390160183066361, 'recall': 0.6338787477537123, 'f1': 0.6364370162860263, 'roc_auc': 0.693646427079163, 'average_precision': 0.7040254270846188, 'mcc': 0.2758057539654742}
✅ Fold 3 XGBoost Metrics: {'accuracy': 0.6378984205050601, 'precision': 0.6390160183066361, 'recall': 0.6338787477537123, 'f1': 0.6364370162860263, 'roc_auc': 0.6937044474472822, 'average_precision': 0.7043424137184369, 'mcc': 0.2758057539654742}
✅ Fold 3 Voting Metrics: {'accuracy': 0.6378984205050601, 'precision': 0.6390160183066361, 'recall': 0.6338787477537123, 'f1': 0.6364370162860263, 'roc_auc': 0.6939506135743383, 'average_precision': 0.7052124469829402, 'mcc': 0.2758057539654742}

Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 473903.74it/s]
Building training set: 100%|██████████| 84588/84588 [00:00<00:00, 1373581.22it/s]
Building test set: 100%|██████████| 21146/21146 [00:00<00:00, 1087707.44it/s]


✅ Fold 4 LogisticRegression Metrics: {'accuracy': 0.6408304171001608, 'precision': 0.6580008488964346, 'recall': 0.5864938995554715, 'f1': 0.6201930289543431, 'roc_auc': 0.6940712880680058, 'average_precision': 0.7027021661908062, 'mcc': 0.28333889520594696}
✅ Fold 4 RandomForest Metrics: {'accuracy': 0.6406885462971721, 'precision': 0.6578919435304108, 'recall': 0.586210157949494, 'f1': 0.6199859957987396, 'roc_auc': 0.6935723674695953, 'average_precision': 0.7005090146522842, 'mcc': 0.28306230736580457}
✅ Fold 4 XGBoost Metrics: {'accuracy': 0.6406885462971721, 'precision': 0.6578919435304108, 'recall': 0.586210157949494, 'f1': 0.6199859957987396, 'roc_auc': 0.6936436718720431, 'average_precision': 0.7010549555776528, 'mcc': 0.28306230736580457}
✅ Fold 4 Voting Metrics: {'accuracy': 0.6407831268324978, 'precision': 0.6579645548126923, 'recall': 0.5863993190201456, 'f1': 0.620124024804961, 'roc_auc': 0.6939465881093901, 'average_precision': 0.7018064854390768, 'mcc': 0.283246698584845

Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 461956.97it/s]
Building training set: 100%|██████████| 84588/84588 [00:00<00:00, 1457363.21it/s]
Building test set: 100%|██████████| 21146/21146 [00:00<00:00, 1087547.39it/s]


✅ Fold 5 LogisticRegression Metrics: {'accuracy': 0.6371417762224534, 'precision': 0.6382268827454719, 'recall': 0.6332166840064315, 'f1': 0.6357119118833974, 'roc_auc': 0.6887774527875804, 'average_precision': 0.7002683701432888, 'mcc': 0.27429200425175865}
✅ Fold 5 RandomForest Metrics: {'accuracy': 0.6366215832781613, 'precision': 0.6378734370525914, 'recall': 0.6320817175825215, 'f1': 0.6349643705463183, 'roc_auc': 0.6882034125404987, 'average_precision': 0.7003567868483487, 'mcc': 0.27325443054407506}
✅ Fold 5 XGBoost Metrics: {'accuracy': 0.6371417762224534, 'precision': 0.6382268827454719, 'recall': 0.6332166840064315, 'f1': 0.6357119118833974, 'roc_auc': 0.6887750822359996, 'average_precision': 0.7008262017857438, 'mcc': 0.27429200425175865}
✅ Fold 5 Voting Metrics: {'accuracy': 0.6371417762224534, 'precision': 0.6382268827454719, 'recall': 0.6332166840064315, 'f1': 0.6357119118833974, 'roc_auc': 0.6888456888911899, 'average_precision': 0.7010511081242461, 'mcc': 0.274292004251

Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 534898.42it/s]
Building training set: 100%|██████████| 84586/84586 [00:00<00:00, 1459589.08it/s]
Building test set: 100%|██████████| 21148/21148 [00:00<00:00, 762227.20it/s]


✅ Fold 1 LogisticRegression Metrics: {'accuracy': 0.6306979383393229, 'precision': 0.6309952606635071, 'recall': 0.629563079250993, 'f1': 0.6302783563718992, 'roc_auc': 0.6870940840470444, 'average_precision': 0.6987982030560788, 'mcc': 0.26139654998743894}
✅ Fold 1 RandomForest Metrics: {'accuracy': 0.6303669377718933, 'precision': 0.630750260836574, 'recall': 0.6289010781161339, 'f1': 0.6298243121655538, 'roc_auc': 0.6857082623301087, 'average_precision': 0.6965310667171623, 'mcc': 0.26073499605198763}
✅ Fold 1 XGBoost Metrics: {'accuracy': 0.6306979383393229, 'precision': 0.6309952606635071, 'recall': 0.629563079250993, 'f1': 0.6302783563718992, 'roc_auc': 0.68620472740611, 'average_precision': 0.6975394972113776, 'mcc': 0.26139654998743894}
✅ Fold 1 Voting Metrics: {'accuracy': 0.6306979383393229, 'precision': 0.6309952606635071, 'recall': 0.629563079250993, 'f1': 0.6302783563718992, 'roc_auc': 0.686860186161681, 'average_precision': 0.6987071000134657, 'mcc': 0.26139654998743894}


Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 580917.48it/s]
Building training set: 100%|██████████| 84586/84586 [00:00<00:00, 1373038.42it/s]
Building test set: 100%|██████████| 21148/21148 [00:00<00:00, 1152921.14it/s]


✅ Fold 2 LogisticRegression Metrics: {'accuracy': 0.6367032343484017, 'precision': 0.6397833865196789, 'recall': 0.6256856440325326, 'f1': 0.6326559885249821, 'roc_auc': 0.6908784770621768, 'average_precision': 0.7026228880367617, 'mcc': 0.2734728690223395}
✅ Fold 2 RandomForest Metrics: {'accuracy': 0.6365613769623605, 'precision': 0.6396788547107758, 'recall': 0.6254019292604501, 'f1': 0.6324598316755929, 'roc_auc': 0.6906394678032477, 'average_precision': 0.7024319551499981, 'mcc': 0.2731908050906403}
✅ Fold 2 XGBoost Metrics: {'accuracy': 0.6367032343484017, 'precision': 0.6397833865196789, 'recall': 0.6256856440325326, 'f1': 0.6326559885249821, 'roc_auc': 0.6908947726398431, 'average_precision': 0.7027369568545185, 'mcc': 0.2734728690223395}
✅ Fold 2 Voting Metrics: {'accuracy': 0.6367032343484017, 'precision': 0.6397833865196789, 'recall': 0.6256856440325326, 'f1': 0.6326559885249821, 'roc_auc': 0.6908349208254941, 'average_precision': 0.7029077613398211, 'mcc': 0.273472869022339

Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 573617.78it/s]
Building training set: 100%|██████████| 84588/84588 [00:00<00:00, 1550334.23it/s]
Building test set: 100%|██████████| 21146/21146 [00:00<00:00, 1062404.95it/s]


✅ Fold 3 LogisticRegression Metrics: {'accuracy': 0.6457958952047669, 'precision': 0.6633464024584084, 'recall': 0.5920741511396954, 'f1': 0.6256871564217891, 'roc_auc': 0.6983797566202105, 'average_precision': 0.7101403578097166, 'mcc': 0.29328958714023723}
✅ Fold 3 RandomForest Metrics: {'accuracy': 0.6415397711151045, 'precision': 0.6540401441070509, 'recall': 0.6009647214603234, 'f1': 0.6263801261829653, 'roc_auc': 0.6980966098884974, 'average_precision': 0.7102764150229576, 'mcc': 0.28401625854378165}
✅ Fold 3 XGBoost Metrics: {'accuracy': 0.6415397711151045, 'precision': 0.6540401441070509, 'recall': 0.6009647214603234, 'f1': 0.6263801261829653, 'roc_auc': 0.6981163704486539, 'average_precision': 0.7101930754561678, 'mcc': 0.28401625854378165}
✅ Fold 3 Voting Metrics: {'accuracy': 0.6415397711151045, 'precision': 0.6540401441070509, 'recall': 0.6009647214603234, 'f1': 0.6263801261829653, 'roc_auc': 0.6980524237015834, 'average_precision': 0.7100007306194418, 'mcc': 0.284016258543

Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 568780.64it/s]
Building training set: 100%|██████████| 84588/84588 [00:00<00:00, 1605745.16it/s]
Building test set: 100%|██████████| 21146/21146 [00:00<00:00, 1126657.76it/s]


✅ Fold 4 LogisticRegression Metrics: {'accuracy': 0.6315615246382295, 'precision': 0.6322997907551836, 'recall': 0.6287713988461174, 'f1': 0.6305306586996727, 'roc_auc': 0.6844080789507105, 'average_precision': 0.6967094888554952, 'mcc': 0.2631271460937846}
✅ Fold 4 RandomForest Metrics: {'accuracy': 0.6315615246382295, 'precision': 0.6322997907551836, 'recall': 0.6287713988461174, 'f1': 0.6305306586996727, 'roc_auc': 0.6839445556074106, 'average_precision': 0.6949376918775094, 'mcc': 0.2631271460937846}
✅ Fold 4 XGBoost Metrics: {'accuracy': 0.6315615246382295, 'precision': 0.6322997907551836, 'recall': 0.6287713988461174, 'f1': 0.6305306586996727, 'roc_auc': 0.6839926912227126, 'average_precision': 0.6955191541156366, 'mcc': 0.2631271460937846}
✅ Fold 4 Voting Metrics: {'accuracy': 0.6315615246382295, 'precision': 0.6322997907551836, 'recall': 0.6287713988461174, 'f1': 0.6305306586996727, 'roc_auc': 0.6842599060587085, 'average_precision': 0.6966769380497304, 'mcc': 0.263127146093784

Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 594766.55it/s]
Building training set: 100%|██████████| 84588/84588 [00:00<00:00, 1489384.57it/s]
Building test set: 100%|██████████| 21146/21146 [00:00<00:00, 947612.64it/s]


✅ Fold 5 LogisticRegression Metrics: {'accuracy': 0.6409722879031495, 'precision': 0.642047079005051, 'recall': 0.6371890664901163, 'f1': 0.6396088483812779, 'roc_auc': 0.6954609143500123, 'average_precision': 0.7055825496229059, 'mcc': 0.2819526469653583}
✅ Fold 5 RandomForest Metrics: {'accuracy': 0.6409722879031495, 'precision': 0.642047079005051, 'recall': 0.6371890664901163, 'f1': 0.6396088483812779, 'roc_auc': 0.6954973448077929, 'average_precision': 0.7051983383359288, 'mcc': 0.2819526469653583}
✅ Fold 5 XGBoost Metrics: {'accuracy': 0.6409722879031495, 'precision': 0.642047079005051, 'recall': 0.6371890664901163, 'f1': 0.6396088483812779, 'roc_auc': 0.6955116486265753, 'average_precision': 0.7054781646305647, 'mcc': 0.2819526469653583}
✅ Fold 5 Voting Metrics: {'accuracy': 0.6409722879031495, 'precision': 0.642047079005051, 'recall': 0.6371890664901163, 'f1': 0.6396088483812779, 'roc_auc': 0.6955356269794497, 'average_precision': 0.7058442940486871, 'mcc': 0.2819526469653583}



Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 579113.57it/s]
Building training set: 100%|██████████| 84586/84586 [00:00<00:00, 1208352.00it/s]
Building test set: 100%|██████████| 21148/21148 [00:00<00:00, 807416.31it/s]


✅ Fold 1 LogisticRegression Metrics: {'accuracy': 0.6365613769623605, 'precision': 0.637210186240973, 'recall': 0.6341970871950067, 'f1': 0.6357000663570007, 'roc_auc': 0.6917135091483659, 'average_precision': 0.7063722698909324, 'mcc': 0.2731258074151757}
✅ Fold 1 RandomForest Metrics: {'accuracy': 0.6365613769623605, 'precision': 0.637210186240973, 'recall': 0.6341970871950067, 'f1': 0.6357000663570007, 'roc_auc': 0.6914731762091435, 'average_precision': 0.705882203944058, 'mcc': 0.2731258074151757}
✅ Fold 1 XGBoost Metrics: {'accuracy': 0.6365613769623605, 'precision': 0.637210186240973, 'recall': 0.6341970871950067, 'f1': 0.6357000663570007, 'roc_auc': 0.6916229443736951, 'average_precision': 0.7067549136746588, 'mcc': 0.2731258074151757}
✅ Fold 1 Voting Metrics: {'accuracy': 0.6365613769623605, 'precision': 0.637210186240973, 'recall': 0.6341970871950067, 'f1': 0.6357000663570007, 'roc_auc': 0.6917156377693783, 'average_precision': 0.7070363573487127, 'mcc': 0.2731258074151757}

-

Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 470671.22it/s]
Building training set: 100%|██████████| 84586/84586 [00:00<00:00, 1362890.19it/s]
Building test set: 100%|██████████| 21148/21148 [00:00<00:00, 837609.22it/s]


✅ Fold 2 LogisticRegression Metrics: {'accuracy': 0.6376489502553433, 'precision': 0.6549558181624614, 'recall': 0.5818044259504445, 'f1': 0.6162167576501227, 'roc_auc': 0.6883838718643132, 'average_precision': 0.7005186912153427, 'mcc': 0.2770312282626983}
✅ Fold 2 RandomForest Metrics: {'accuracy': 0.6376489502553433, 'precision': 0.6549558181624614, 'recall': 0.5818044259504445, 'f1': 0.6162167576501227, 'roc_auc': 0.6881951132657128, 'average_precision': 0.7009112457505222, 'mcc': 0.2770312282626983}
✅ Fold 2 XGBoost Metrics: {'accuracy': 0.6376489502553433, 'precision': 0.6549558181624614, 'recall': 0.5818044259504445, 'f1': 0.6162167576501227, 'roc_auc': 0.6882648524352265, 'average_precision': 0.7010873019376209, 'mcc': 0.2770312282626983}
✅ Fold 2 Voting Metrics: {'accuracy': 0.6376489502553433, 'precision': 0.6549558181624614, 'recall': 0.5818044259504445, 'f1': 0.6162167576501227, 'roc_auc': 0.688413896153131, 'average_precision': 0.7018041997727966, 'mcc': 0.2770312282626983

Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 578768.18it/s]
Building training set: 100%|██████████| 84588/84588 [00:00<00:00, 1449663.67it/s]
Building test set: 100%|██████████| 21146/21146 [00:00<00:00, 789861.45it/s]


✅ Fold 3 LogisticRegression Metrics: {'accuracy': 0.6359122292632177, 'precision': 0.6543501611170784, 'recall': 0.5761846212049561, 'f1': 0.6127847910275109, 'roc_auc': 0.6882464581790108, 'average_precision': 0.7005908840173597, 'mcc': 0.2737848704685477}
✅ Fold 3 RandomForest Metrics: {'accuracy': 0.6352974557835998, 'precision': 0.6352974557835998, 'recall': 0.6352974557835998, 'f1': 0.6352974557835998, 'roc_auc': 0.6870729456918531, 'average_precision': 0.6983414603786641, 'mcc': 0.27059491156719945}
✅ Fold 3 XGBoost Metrics: {'accuracy': 0.6358176487278918, 'precision': 0.6542758917060593, 'recall': 0.5759954601343044, 'f1': 0.6126452391730798, 'roc_auc': 0.6875459244050423, 'average_precision': 0.6988003040998584, 'mcc': 0.27360062204820884}
✅ Fold 3 Voting Metrics: {'accuracy': 0.6359122292632177, 'precision': 0.6543501611170784, 'recall': 0.5761846212049561, 'f1': 0.6127847910275109, 'roc_auc': 0.6881990471473994, 'average_precision': 0.7003219746750011, 'mcc': 0.2737848704685

Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 580540.30it/s]
Building training set: 100%|██████████| 84588/84588 [00:00<00:00, 1396071.30it/s]
Building test set: 100%|██████████| 21146/21146 [00:00<00:00, 835085.42it/s]


✅ Fold 4 LogisticRegression Metrics: {'accuracy': 0.6321762981178474, 'precision': 0.6322763842877426, 'recall': 0.6317979759765441, 'f1': 0.6320370896016653, 'roc_auc': 0.6819438413825829, 'average_precision': 0.6957900030723031, 'mcc': 0.26435267190805506}
✅ Fold 4 RandomForest Metrics: {'accuracy': 0.6321762981178474, 'precision': 0.6322763842877426, 'recall': 0.6317979759765441, 'f1': 0.6320370896016653, 'roc_auc': 0.6817381177600391, 'average_precision': 0.6945068255172976, 'mcc': 0.26435267190805506}
✅ Fold 4 XGBoost Metrics: {'accuracy': 0.6321762981178474, 'precision': 0.6322763842877426, 'recall': 0.6317979759765441, 'f1': 0.6320370896016653, 'roc_auc': 0.6817673873629508, 'average_precision': 0.6952211233749654, 'mcc': 0.26435267190805506}
✅ Fold 4 Voting Metrics: {'accuracy': 0.6321762981178474, 'precision': 0.6322763842877426, 'recall': 0.6317979759765441, 'f1': 0.6320370896016653, 'roc_auc': 0.681838025327313, 'average_precision': 0.6956697561296001, 'mcc': 0.2643526719080

Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 575666.90it/s]
Building training set: 100%|██████████| 84588/84588 [00:00<00:00, 1356253.53it/s]
Building test set: 100%|██████████| 21146/21146 [00:00<00:00, 1124187.24it/s]


✅ Fold 5 LogisticRegression Metrics: {'accuracy': 0.6337841672183865, 'precision': 0.634804155151053, 'recall': 0.6300009458053533, 'f1': 0.6323934301718409, 'roc_auc': 0.6898839368106129, 'average_precision': 0.7028099938558633, 'mcc': 0.26757599405065374}
✅ Fold 5 RandomForest Metrics: {'accuracy': 0.6336422964153977, 'precision': 0.634699714013346, 'recall': 0.6297172041993757, 'f1': 0.6321986421687319, 'roc_auc': 0.6895496443103645, 'average_precision': 0.7019496960561152, 'mcc': 0.2672928289708517}
✅ Fold 5 XGBoost Metrics: {'accuracy': 0.6336422964153977, 'precision': 0.634699714013346, 'recall': 0.6297172041993757, 'f1': 0.6321986421687319, 'roc_auc': 0.6895700578903903, 'average_precision': 0.7024773393302906, 'mcc': 0.2672928289708517}
✅ Fold 5 Voting Metrics: {'accuracy': 0.6337841672183865, 'precision': 0.634804155151053, 'recall': 0.6300009458053533, 'f1': 0.6323934301718409, 'roc_auc': 0.6897750882384153, 'average_precision': 0.7028890204007294, 'mcc': 0.26757599405065374}

Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 301590.60it/s]
Building training set: 100%|██████████| 84586/84586 [00:00<00:00, 1224817.45it/s]
Building test set: 100%|██████████| 21148/21148 [00:00<00:00, 796690.60it/s]


✅ Fold 1 LogisticRegression Metrics: {'accuracy': 0.6330622281066768, 'precision': 0.6331629755820557, 'recall': 0.6326839417439001, 'f1': 0.6329233680227058, 'roc_auc': 0.6909358872230114, 'average_precision': 0.7041219541145335, 'mcc': 0.26612453237851014}
✅ Fold 1 RandomForest Metrics: {'accuracy': 0.6329203707206356, 'precision': 0.6330587901164442, 'recall': 0.6324002269718176, 'f1': 0.6327293371812461, 'roc_auc': 0.6906529460883977, 'average_precision': 0.703774012670272, 'mcc': 0.26584088528755745}
✅ Fold 1 XGBoost Metrics: {'accuracy': 0.6330622281066768, 'precision': 0.6331629755820557, 'recall': 0.6326839417439001, 'f1': 0.6329233680227058, 'roc_auc': 0.6907771529132289, 'average_precision': 0.7040265306785122, 'mcc': 0.26612453237851014}
✅ Fold 1 Voting Metrics: {'accuracy': 0.6330622281066768, 'precision': 0.6331629755820557, 'recall': 0.6326839417439001, 'f1': 0.6329233680227058, 'roc_auc': 0.6908679323387582, 'average_precision': 0.7045894089930556, 'mcc': 0.2661245323785

Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 599882.51it/s]
Building training set: 100%|██████████| 84586/84586 [00:00<00:00, 1448640.89it/s]
Building test set: 100%|██████████| 21148/21148 [00:00<00:00, 997965.18it/s]


✅ Fold 2 LogisticRegression Metrics: {'accuracy': 0.6381690940041611, 'precision': 0.638904734740445, 'recall': 0.6355210894647247, 'f1': 0.6372084202541248, 'roc_auc': 0.6933554987772235, 'average_precision': 0.7046952682982818, 'mcc': 0.27634206341682327}
✅ Fold 2 RandomForest Metrics: {'accuracy': 0.6380272366181199, 'precision': 0.6388281175687245, 'recall': 0.6351428031019482, 'f1': 0.6369801299378764, 'roc_auc': 0.6930116772928978, 'average_precision': 0.7050058078935648, 'mcc': 0.2760590668734302}
✅ Fold 2 XGBoost Metrics: {'accuracy': 0.638121808208814, 'precision': 0.6388968140751308, 'recall': 0.6353319462833364, 'f1': 0.6371093935226896, 'roc_auc': 0.6931497022667382, 'average_precision': 0.7052839955011203, 'mcc': 0.27624791670825277}
✅ Fold 2 Voting Metrics: {'accuracy': 0.638121808208814, 'precision': 0.6388968140751308, 'recall': 0.6353319462833364, 'f1': 0.6371093935226896, 'roc_auc': 0.6932949761789422, 'average_precision': 0.7057206666903539, 'mcc': 0.2762479167082527

Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 556527.39it/s]
Building training set: 100%|██████████| 84588/84588 [00:00<00:00, 1566205.29it/s]
Building test set: 100%|██████████| 21146/21146 [00:00<00:00, 1128090.77it/s]


✅ Fold 3 LogisticRegression Metrics: {'accuracy': 0.6376146788990825, 'precision': 0.6550511508951407, 'recall': 0.5813865506478767, 'f1': 0.6160244525730321, 'roc_auc': 0.6913244583877803, 'average_precision': 0.7056511215738306, 'mcc': 0.27698637201384263}
✅ Fold 3 RandomForest Metrics: {'accuracy': 0.6376146788990825, 'precision': 0.6550511508951407, 'recall': 0.5813865506478767, 'f1': 0.6160244525730321, 'roc_auc': 0.6910733856662263, 'average_precision': 0.7041133779150922, 'mcc': 0.27698637201384263}
✅ Fold 3 XGBoost Metrics: {'accuracy': 0.6376146788990825, 'precision': 0.6550511508951407, 'recall': 0.5813865506478767, 'f1': 0.6160244525730321, 'roc_auc': 0.6911062602966361, 'average_precision': 0.7046547509513618, 'mcc': 0.27698637201384263}
✅ Fold 3 Voting Metrics: {'accuracy': 0.6376146788990825, 'precision': 0.6550511508951407, 'recall': 0.5813865506478767, 'f1': 0.6160244525730321, 'roc_auc': 0.6913141979249015, 'average_precision': 0.7053732673901569, 'mcc': 0.276986372013

Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 559515.70it/s]
Building training set: 100%|██████████| 84588/84588 [00:00<00:00, 1378084.24it/s]
Building test set: 100%|██████████| 21146/21146 [00:00<00:00, 803907.95it/s]


✅ Fold 4 LogisticRegression Metrics: {'accuracy': 0.6294807528610612, 'precision': 0.6305301296720061, 'recall': 0.6254610801097135, 'f1': 0.6279853758131143, 'roc_auc': 0.6866723045837817, 'average_precision': 0.6981223261663672, 'mcc': 0.2589698746081853}
✅ Fold 4 RandomForest Metrics: {'accuracy': 0.6293861723257353, 'precision': 0.6304596604997139, 'recall': 0.6252719190390618, 'f1': 0.6278550738401634, 'roc_auc': 0.6864369580119585, 'average_precision': 0.6974924862135138, 'mcc': 0.25878110561678064}
✅ Fold 4 XGBoost Metrics: {'accuracy': 0.6293861723257353, 'precision': 0.6304596604997139, 'recall': 0.6252719190390618, 'f1': 0.6278550738401634, 'roc_auc': 0.6864763449501065, 'average_precision': 0.6979793192485878, 'mcc': 0.25878110561678064}
✅ Fold 4 Voting Metrics: {'accuracy': 0.6294807528610612, 'precision': 0.6305301296720061, 'recall': 0.6254610801097135, 'f1': 0.6279853758131143, 'roc_auc': 0.6866060275397801, 'average_precision': 0.6986067261332425, 'mcc': 0.2589698746081

Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 588123.68it/s]
Building training set: 100%|██████████| 84588/84588 [00:00<00:00, 1236069.48it/s]
Building test set: 100%|██████████| 21146/21146 [00:00<00:00, 751086.09it/s]


✅ Fold 5 LogisticRegression Metrics: {'accuracy': 0.6355811973895772, 'precision': 0.6369280733594421, 'recall': 0.6306630095526341, 'f1': 0.6337800589297595, 'roc_auc': 0.6878050435837537, 'average_precision': 0.7008048490477228, 'mcc': 0.27117551379315513}
✅ Fold 5 RandomForest Metrics: {'accuracy': 0.6355811973895772, 'precision': 0.6369280733594421, 'recall': 0.6306630095526341, 'f1': 0.6337800589297595, 'roc_auc': 0.6872088274975467, 'average_precision': 0.6988149893384643, 'mcc': 0.27117551379315513}
✅ Fold 5 XGBoost Metrics: {'accuracy': 0.6355811973895772, 'precision': 0.6369280733594421, 'recall': 0.6306630095526341, 'f1': 0.6337800589297595, 'roc_auc': 0.6872577279511889, 'average_precision': 0.6991832379193701, 'mcc': 0.27117551379315513}
✅ Fold 5 Voting Metrics: {'accuracy': 0.6355811973895772, 'precision': 0.6369280733594421, 'recall': 0.6306630095526341, 'f1': 0.6337800589297595, 'roc_auc': 0.6873878265055737, 'average_precision': 0.7000473668713663, 'mcc': 0.271175513793

Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 577648.69it/s]
Building training set: 100%|██████████| 84586/84586 [00:00<00:00, 1405194.92it/s]
Building test set: 100%|██████████| 21148/21148 [00:00<00:00, 1016445.59it/s]


✅ Fold 1 LogisticRegression Metrics: {'accuracy': 0.6311707962927936, 'precision': 0.6317939946788294, 'recall': 0.6288065065254398, 'f1': 0.6302967105886814, 'roc_auc': 0.686323885463876, 'average_precision': 0.7026774480781507, 'mcc': 0.26234452554352405}
✅ Fold 1 RandomForest Metrics: {'accuracy': 0.6310289389067524, 'precision': 0.631689002946488, 'recall': 0.6285227917533573, 'f1': 0.630101919886229, 'roc_auc': 0.6861548031939618, 'average_precision': 0.7021917660486177, 'mcc': 0.26206116972790966}
✅ Fold 1 XGBoost Metrics: {'accuracy': 0.6311707962927936, 'precision': 0.6317939946788294, 'recall': 0.6288065065254398, 'f1': 0.6302967105886814, 'roc_auc': 0.6863288805682267, 'average_precision': 0.7024747392763444, 'mcc': 0.26234452554352405}
✅ Fold 1 Voting Metrics: {'accuracy': 0.6311707962927936, 'precision': 0.6317939946788294, 'recall': 0.6288065065254398, 'f1': 0.6302967105886814, 'roc_auc': 0.6863708761142928, 'average_precision': 0.702418020099944, 'mcc': 0.2623445255435240

Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 572748.80it/s]
Building training set: 100%|██████████| 84586/84586 [00:00<00:00, 1422326.36it/s]
Building test set: 100%|██████████| 21148/21148 [00:00<00:00, 908413.64it/s]


✅ Fold 2 LogisticRegression Metrics: {'accuracy': 0.6354265178740306, 'precision': 0.6357088703563305, 'recall': 0.634386230376395, 'f1': 0.6350468616870207, 'roc_auc': 0.6875435674164146, 'average_precision': 0.699123679416792, 'mcc': 0.2708536219832338}
✅ Fold 2 RandomForest Metrics: {'accuracy': 0.6351900888972952, 'precision': 0.6355361714231536, 'recall': 0.6339133724229241, 'f1': 0.6347237346716538, 'roc_auc': 0.6867700819919772, 'average_precision': 0.6973038479558171, 'mcc': 0.2703810592409604}
✅ Fold 2 XGBoost Metrics: {'accuracy': 0.6354265178740306, 'precision': 0.6357088703563305, 'recall': 0.634386230376395, 'f1': 0.6350468616870207, 'roc_auc': 0.6873225262230904, 'average_precision': 0.6982242913611942, 'mcc': 0.2708536219832338}
✅ Fold 2 Voting Metrics: {'accuracy': 0.6354265178740306, 'precision': 0.6357088703563305, 'recall': 0.634386230376395, 'f1': 0.6350468616870207, 'roc_auc': 0.6874791811026821, 'average_precision': 0.6990363137482951, 'mcc': 0.2708536219832338}



Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 322938.79it/s]
Building training set: 100%|██████████| 84588/84588 [00:00<00:00, 1378914.42it/s]
Building test set: 100%|██████████| 21146/21146 [00:00<00:00, 862820.32it/s]


✅ Fold 3 LogisticRegression Metrics: {'accuracy': 0.6369999054194647, 'precision': 0.6376508600209065, 'recall': 0.6346353920363189, 'f1': 0.6361395525218051, 'roc_auc': 0.6926480536264211, 'average_precision': 0.7057435007307945, 'mcc': 0.27400287471430335}
✅ Fold 3 RandomForest Metrics: {'accuracy': 0.6368580346164759, 'precision': 0.6375475285171103, 'recall': 0.6343516504303415, 'f1': 0.6359455743611625, 'roc_auc': 0.6921698060268886, 'average_precision': 0.7035624994543301, 'mcc': 0.27371950824548413}
✅ Fold 3 XGBoost Metrics: {'accuracy': 0.6368580346164759, 'precision': 0.6375475285171103, 'recall': 0.6343516504303415, 'f1': 0.6359455743611625, 'roc_auc': 0.69223325182721, 'average_precision': 0.7043530698274708, 'mcc': 0.27371950824548413}
✅ Fold 3 Voting Metrics: {'accuracy': 0.6368580346164759, 'precision': 0.6375475285171103, 'recall': 0.6343516504303415, 'f1': 0.6359455743611625, 'roc_auc': 0.6925011509922472, 'average_precision': 0.7051214408114512, 'mcc': 0.27371950824548

Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 559705.66it/s]
Building training set: 100%|██████████| 84588/84588 [00:00<00:00, 1463132.92it/s]
Building test set: 100%|██████████| 21146/21146 [00:00<00:00, 851701.16it/s]


✅ Fold 4 LogisticRegression Metrics: {'accuracy': 0.6341624893596898, 'precision': 0.6510810522952392, 'recall': 0.5781708124467985, 'f1': 0.6124636809938884, 'roc_auc': 0.6887068998052561, 'average_precision': 0.7029402042510214, 'mcc': 0.2700234035962098}
✅ Fold 4 RandomForest Metrics: {'accuracy': 0.6340679088243639, 'precision': 0.6510067114093959, 'recall': 0.5779816513761468, 'f1': 0.6123246492985972, 'roc_auc': 0.6883878011988174, 'average_precision': 0.701306687818381, 'mcc': 0.26983883869489483}
✅ Fold 4 XGBoost Metrics: {'accuracy': 0.6340679088243639, 'precision': 0.6510067114093959, 'recall': 0.5779816513761468, 'f1': 0.6123246492985972, 'roc_auc': 0.6884227869619556, 'average_precision': 0.7014285510478734, 'mcc': 0.26983883869489483}
✅ Fold 4 Voting Metrics: {'accuracy': 0.6341624893596898, 'precision': 0.6510810522952392, 'recall': 0.5781708124467985, 'f1': 0.6124636809938884, 'roc_auc': 0.6886414189087664, 'average_precision': 0.701887087039922, 'mcc': 0.270023403596209

Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 545355.04it/s]
Building training set: 100%|██████████| 84588/84588 [00:00<00:00, 1490297.97it/s]
Building test set: 100%|██████████| 21146/21146 [00:00<00:00, 930356.57it/s]


✅ Fold 5 LogisticRegression Metrics: {'accuracy': 0.6311832024969262, 'precision': 0.6311956110480514, 'recall': 0.6311359122292632, 'f1': 0.631165760227004, 'roc_auc': 0.683576950148347, 'average_precision': 0.6968805268836955, 'mcc': 0.2623664061673487}
✅ Fold 5 RandomForest Metrics: {'accuracy': 0.6310886219616003, 'precision': 0.6311258278145695, 'recall': 0.6309467511586115, 'f1': 0.6310362767819137, 'roc_auc': 0.6832309256541441, 'average_precision': 0.6962946883420564, 'mcc': 0.2621772544770543}
✅ Fold 5 XGBoost Metrics: {'accuracy': 0.6310886219616003, 'precision': 0.6311258278145695, 'recall': 0.6309467511586115, 'f1': 0.6310362767819137, 'roc_auc': 0.6832864189248236, 'average_precision': 0.6970394689695142, 'mcc': 0.2621772544770543}
✅ Fold 5 Voting Metrics: {'accuracy': 0.6310886219616003, 'precision': 0.6311258278145695, 'recall': 0.6309467511586115, 'f1': 0.6310362767819137, 'roc_auc': 0.6833878338050836, 'average_precision': 0.6970994396976535, 'mcc': 0.2621772544770543}

Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 510708.11it/s]
Building training set: 100%|██████████| 84586/84586 [00:00<00:00, 1608801.75it/s]
Building test set: 100%|██████████| 21148/21148 [00:00<00:00, 1060840.78it/s]


✅ Fold 1 LogisticRegression Metrics: {'accuracy': 0.6370815207111784, 'precision': 0.6375367681943258, 'recall': 0.6354265178740306, 'f1': 0.6364798939042297, 'roc_auc': 0.69221388713064, 'average_precision': 0.7041525549881393, 'mcc': 0.27416454331869633}
✅ Fold 1 RandomForest Metrics: {'accuracy': 0.6368450917344429, 'precision': 0.6373647237516613, 'recall': 0.6349536599205599, 'f1': 0.6361569073337123, 'roc_auc': 0.6915684319994487, 'average_precision': 0.7026915339778421, 'mcc': 0.2736921417509972}
✅ Fold 1 XGBoost Metrics: {'accuracy': 0.6368450917344429, 'precision': 0.6373647237516613, 'recall': 0.6349536599205599, 'f1': 0.6361569073337123, 'roc_auc': 0.6915836051320016, 'average_precision': 0.7030151731662331, 'mcc': 0.2736921417509972}
✅ Fold 1 Voting Metrics: {'accuracy': 0.6368450917344429, 'precision': 0.6373647237516613, 'recall': 0.6349536599205599, 'f1': 0.6361569073337123, 'roc_auc': 0.691928696633906, 'average_precision': 0.7036187345854805, 'mcc': 0.2736921417509972}

Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 625163.75it/s]
Building training set: 100%|██████████| 84586/84586 [00:00<00:00, 1345558.60it/s]
Building test set: 100%|██████████| 21148/21148 [00:00<00:00, 790844.70it/s]


✅ Fold 2 LogisticRegression Metrics: {'accuracy': 0.635615661055419, 'precision': 0.6358726549175668, 'recall': 0.6346699451484774, 'f1': 0.635270730783794, 'roc_auc': 0.6918371346271224, 'average_precision': 0.7052982938817737, 'mcc': 0.2712318072791073}
✅ Fold 2 RandomForest Metrics: {'accuracy': 0.6353792320786835, 'precision': 0.6357000663570007, 'recall': 0.6341970871950067, 'f1': 0.6349476873550158, 'roc_auc': 0.6913490856535273, 'average_precision': 0.7043574800236053, 'mcc': 0.27075922091232035}
✅ Fold 2 XGBoost Metrics: {'accuracy': 0.6353792320786835, 'precision': 0.6357000663570007, 'recall': 0.6341970871950067, 'f1': 0.6349476873550158, 'roc_auc': 0.6914640893228049, 'average_precision': 0.7047435519031653, 'mcc': 0.27075922091232035}
✅ Fold 2 Voting Metrics: {'accuracy': 0.635615661055419, 'precision': 0.6358726549175668, 'recall': 0.6346699451484774, 'f1': 0.635270730783794, 'roc_auc': 0.691751837742268, 'average_precision': 0.7056881221902324, 'mcc': 0.2712318072791073}


Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 568349.11it/s]
Building training set: 100%|██████████| 84588/84588 [00:00<00:00, 1259989.30it/s]
Building test set: 100%|██████████| 21146/21146 [00:00<00:00, 775944.22it/s]


✅ Fold 3 LogisticRegression Metrics: {'accuracy': 0.6354866168542513, 'precision': 0.6366237482117311, 'recall': 0.6313250732999148, 'f1': 0.6339633393484662, 'roc_auc': 0.6895859853133685, 'average_precision': 0.7018067502281758, 'mcc': 0.2709826198661225}
✅ Fold 3 RandomForest Metrics: {'accuracy': 0.6349191336422965, 'precision': 0.6362073904325408, 'recall': 0.630190106876005, 'f1': 0.6331844531027273, 'roc_auc': 0.6885055863031999, 'average_precision': 0.698659194368605, 'mcc': 0.26985033725534874}
✅ Fold 3 XGBoost Metrics: {'accuracy': 0.6354866168542513, 'precision': 0.6366237482117311, 'recall': 0.6313250732999148, 'f1': 0.6339633393484662, 'roc_auc': 0.6891051435253137, 'average_precision': 0.6996966973465119, 'mcc': 0.2709826198661225}
✅ Fold 3 Voting Metrics: {'accuracy': 0.6354866168542513, 'precision': 0.6366237482117311, 'recall': 0.6313250732999148, 'f1': 0.6339633393484662, 'roc_auc': 0.6894011270174725, 'average_precision': 0.7008111525548543, 'mcc': 0.2709826198661225

Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 591555.84it/s]
Building training set: 100%|██████████| 84588/84588 [00:00<00:00, 1323879.48it/s]
Building test set: 100%|██████████| 21146/21146 [00:00<00:00, 870347.41it/s]


✅ Fold 4 LogisticRegression Metrics: {'accuracy': 0.6375200983637568, 'precision': 0.638766940255774, 'recall': 0.6330275229357798, 'f1': 0.63588428103178, 'roc_auc': 0.6927801380768471, 'average_precision': 0.7058223623315579, 'mcc': 0.27505129980109255}
✅ Fold 4 RandomForest Metrics: {'accuracy': 0.6371417762224534, 'precision': 0.6384909264565425, 'recall': 0.6322708786531732, 'f1': 0.6353656797985078, 'roc_auc': 0.692213401812277, 'average_precision': 0.7050419668436174, 'mcc': 0.2742965684787192}
✅ Fold 4 XGBoost Metrics: {'accuracy': 0.6375200983637568, 'precision': 0.638766940255774, 'recall': 0.6330275229357798, 'f1': 0.63588428103178, 'roc_auc': 0.6926392915310506, 'average_precision': 0.705432755655822, 'mcc': 0.27505129980109255}
✅ Fold 4 Voting Metrics: {'accuracy': 0.6375200983637568, 'precision': 0.638766940255774, 'recall': 0.6330275229357798, 'f1': 0.63588428103178, 'roc_auc': 0.6926993693590321, 'average_precision': 0.705865551387006, 'mcc': 0.27505129980109255}

-----

Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 286733.97it/s]
Building training set: 100%|██████████| 84588/84588 [00:00<00:00, 1440458.41it/s]
Building test set: 100%|██████████| 21146/21146 [00:00<00:00, 933932.34it/s]


✅ Fold 5 LogisticRegression Metrics: {'accuracy': 0.6356284876572401, 'precision': 0.6363895758036903, 'recall': 0.6328383618651282, 'f1': 0.6346090008061839, 'roc_auc': 0.6889783503249253, 'average_precision': 0.6992999242661829, 'mcc': 0.27126119877677}
✅ Fold 5 RandomForest Metrics: {'accuracy': 0.6356284876572401, 'precision': 0.6363895758036903, 'recall': 0.6328383618651282, 'f1': 0.6346090008061839, 'roc_auc': 0.6888023077972656, 'average_precision': 0.6992982341629379, 'mcc': 0.27126119877677}
✅ Fold 5 XGBoost Metrics: {'accuracy': 0.6356284876572401, 'precision': 0.6363895758036903, 'recall': 0.6328383618651282, 'f1': 0.6346090008061839, 'roc_auc': 0.6888381523262593, 'average_precision': 0.6994754720821229, 'mcc': 0.27126119877677}
✅ Fold 5 Voting Metrics: {'accuracy': 0.6356284876572401, 'precision': 0.6363895758036903, 'recall': 0.6328383618651282, 'f1': 0.6346090008061839, 'roc_auc': 0.6889923052700787, 'average_precision': 0.6997673830868736, 'mcc': 0.27126119877677}

5折交叉

Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 569844.02it/s]
Building training set: 100%|██████████| 84586/84586 [00:00<00:00, 1414839.12it/s]
Building test set: 100%|██████████| 21148/21148 [00:00<00:00, 881931.48it/s]


✅ Fold 1 LogisticRegression Metrics: {'accuracy': 0.6299886514091168, 'precision': 0.6311670960969559, 'recall': 0.6254965008511443, 'f1': 0.6283190044174227, 'roc_auc': 0.686448789903997, 'average_precision': 0.7015173039361402, 'mcc': 0.25998779583408516}
✅ Fold 1 RandomForest Metrics: {'accuracy': 0.6299886514091168, 'precision': 0.6311670960969559, 'recall': 0.6254965008511443, 'f1': 0.6283190044174227, 'roc_auc': 0.686333500033575, 'average_precision': 0.700071717059322, 'mcc': 0.25998779583408516}
✅ Fold 1 XGBoost Metrics: {'accuracy': 0.6299886514091168, 'precision': 0.6311670960969559, 'recall': 0.6254965008511443, 'f1': 0.6283190044174227, 'roc_auc': 0.6865155686804221, 'average_precision': 0.7008414382853759, 'mcc': 0.25998779583408516}
✅ Fold 1 Voting Metrics: {'accuracy': 0.6299886514091168, 'precision': 0.6311670960969559, 'recall': 0.6254965008511443, 'f1': 0.6283190044174227, 'roc_auc': 0.686435508382134, 'average_precision': 0.7012777929941487, 'mcc': 0.2599877958340851

Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 565165.11it/s]
Building training set: 100%|██████████| 84586/84586 [00:00<00:00, 1374511.93it/s]
Building test set: 100%|██████████| 21148/21148 [00:00<00:00, 985874.95it/s]


✅ Fold 2 LogisticRegression Metrics: {'accuracy': 0.6427085303574807, 'precision': 0.6435775451950523, 'recall': 0.6396822394552676, 'f1': 0.6416239802693986, 'roc_auc': 0.6995520844762746, 'average_precision': 0.7105681935712831, 'mcc': 0.2854222888067295}
✅ Fold 2 RandomForest Metrics: {'accuracy': 0.6426139587667865, 'precision': 0.6435097068899885, 'recall': 0.6394930962738793, 'f1': 0.6414951143155299, 'roc_auc': 0.6991836094464838, 'average_precision': 0.709017698929416, 'mcc': 0.2852334738117976}
✅ Fold 2 XGBoost Metrics: {'accuracy': 0.6427085303574807, 'precision': 0.6435775451950523, 'recall': 0.6396822394552676, 'f1': 0.6416239802693986, 'roc_auc': 0.6994376174341431, 'average_precision': 0.70961104601407, 'mcc': 0.2854222888067295}
✅ Fold 2 Voting Metrics: {'accuracy': 0.6427085303574807, 'precision': 0.6435775451950523, 'recall': 0.6396822394552676, 'f1': 0.6416239802693986, 'roc_auc': 0.6995085282395921, 'average_precision': 0.710225580471642, 'mcc': 0.2854222888067295}



Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 556873.31it/s]
Building training set: 100%|██████████| 84588/84588 [00:00<00:00, 1585289.49it/s]
Building test set: 100%|██████████| 21146/21146 [00:00<00:00, 1036420.87it/s]


✅ Fold 3 LogisticRegression Metrics: {'accuracy': 0.6355811973895772, 'precision': 0.6359927900578692, 'recall': 0.6340679088243639, 'f1': 0.6350288907833664, 'roc_auc': 0.6866879949516018, 'average_precision': 0.7019190732919429, 'mcc': 0.2711636367343853}
✅ Fold 3 RandomForest Metrics: {'accuracy': 0.6353447460512627, 'precision': 0.6358200455580866, 'recall': 0.6335950061477348, 'f1': 0.6347055758207399, 'roc_auc': 0.6862804613529915, 'average_precision': 0.7009868559558763, 'mcc': 0.2706911495980871}
✅ Fold 3 XGBoost Metrics: {'accuracy': 0.6353920363189256, 'precision': 0.6358546075733131, 'recall': 0.6336895866830606, 'f1': 0.6347702510658456, 'roc_auc': 0.6863662887384245, 'average_precision': 0.7013651364391452, 'mcc': 0.27078564229728114}
✅ Fold 3 Voting Metrics: {'accuracy': 0.6355811973895772, 'precision': 0.6359927900578692, 'recall': 0.6340679088243639, 'f1': 0.6350288907833664, 'roc_auc': 0.6866280334148299, 'average_precision': 0.7017300205178097, 'mcc': 0.27116363673438

Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 595258.31it/s]
Building training set: 100%|██████████| 84588/84588 [00:00<00:00, 1320632.00it/s]
Building test set: 100%|██████████| 21146/21146 [00:00<00:00, 742229.82it/s]


✅ Fold 4 LogisticRegression Metrics: {'accuracy': 0.634588101768656, 'precision': 0.635912129894938, 'recall': 0.6297172041993757, 'f1': 0.6327995057738915, 'roc_auc': 0.6886299105517536, 'average_precision': 0.7010418714881618, 'mcc': 0.26918897720359825}
✅ Fold 4 RandomForest Metrics: {'accuracy': 0.6343989406980044, 'precision': 0.635824890078379, 'recall': 0.6291497209874208, 'f1': 0.6324696933681959, 'roc_auc': 0.6880033692962706, 'average_precision': 0.7004972352474614, 'mcc': 0.26881269569960986}
✅ Fold 4 XGBoost Metrics: {'accuracy': 0.6345408115009931, 'precision': 0.6359033151810451, 'recall': 0.6295280431287241, 'f1': 0.6326996197718631, 'roc_auc': 0.6882697879847546, 'average_precision': 0.701337065927687, 'mcc': 0.26909514690504865}
✅ Fold 4 Voting Metrics: {'accuracy': 0.634588101768656, 'precision': 0.635912129894938, 'recall': 0.6297172041993757, 'f1': 0.6327995057738915, 'roc_auc': 0.6885932072569043, 'average_precision': 0.7018757283172443, 'mcc': 0.26918897720359825}

Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 579380.64it/s]
Building training set: 100%|██████████| 84588/84588 [00:00<00:00, 1585296.57it/s]
Building test set: 100%|██████████| 21146/21146 [00:00<00:00, 960710.06it/s]


✅ Fold 5 LogisticRegression Metrics: {'accuracy': 0.6384186134493521, 'precision': 0.6395803528850739, 'recall': 0.6342570698950156, 'f1': 0.6369075885649159, 'roc_auc': 0.6934242616686755, 'average_precision': 0.7071398836210593, 'mcc': 0.27684681617736145}
✅ Fold 5 RandomForest Metrics: {'accuracy': 0.6383240329140263, 'precision': 0.6395115901936469, 'recall': 0.6340679088243639, 'f1': 0.6367781155015197, 'roc_auc': 0.6932107688987819, 'average_precision': 0.7067580105513973, 'mcc': 0.27665808910658635}
✅ Fold 5 XGBoost Metrics: {'accuracy': 0.6383240329140263, 'precision': 0.6395115901936469, 'recall': 0.6340679088243639, 'f1': 0.6367781155015197, 'roc_auc': 0.6932669599167189, 'average_precision': 0.7074222942766145, 'mcc': 0.27665808910658635}
✅ Fold 5 Voting Metrics: {'accuracy': 0.6384186134493521, 'precision': 0.6395803528850739, 'recall': 0.6342570698950156, 'f1': 0.6369075885649159, 'roc_auc': 0.6933748379045901, 'average_precision': 0.7077237165843064, 'mcc': 0.276846816177

Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 548870.88it/s]
Building training set: 100%|██████████| 84586/84586 [00:00<00:00, 1407419.12it/s]
Building test set: 100%|██████████| 21148/21148 [00:00<00:00, 1086358.13it/s]


✅ Fold 1 LogisticRegression Metrics: {'accuracy': 0.6406752411575563, 'precision': 0.6581605528973951, 'recall': 0.5853981463968224, 'f1': 0.619650633164823, 'roc_auc': 0.6941862244305662, 'average_precision': 0.7053963824221663, 'mcc': 0.28308577041933103}
✅ Fold 1 RandomForest Metrics: {'accuracy': 0.640486097976168, 'precision': 0.6580151047760876, 'recall': 0.5850198600340458, 'f1': 0.6193742177722152, 'roc_auc': 0.6937226859018639, 'average_precision': 0.7033860645328831, 'mcc': 0.2827171414811985}
✅ Fold 1 XGBoost Metrics: {'accuracy': 0.6406752411575563, 'precision': 0.6581605528973951, 'recall': 0.5853981463968224, 'f1': 0.619650633164823, 'roc_auc': 0.6939617890705435, 'average_precision': 0.7044066990161236, 'mcc': 0.28308577041933103}
✅ Fold 1 Voting Metrics: {'accuracy': 0.6406752411575563, 'precision': 0.6581605528973951, 'recall': 0.5853981463968224, 'f1': 0.619650633164823, 'roc_auc': 0.6940895778815742, 'average_precision': 0.7052601896271827, 'mcc': 0.28308577041933103

Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 581866.41it/s]
Building training set: 100%|██████████| 84586/84586 [00:00<00:00, 1503429.94it/s]
Building test set: 100%|██████████| 21148/21148 [00:00<00:00, 855709.55it/s]


✅ Fold 2 LogisticRegression Metrics: {'accuracy': 0.6303196519765463, 'precision': 0.6316392816201758, 'recall': 0.625307357669756, 'f1': 0.628457370972341, 'roc_auc': 0.6822077942660243, 'average_precision': 0.6952367089641283, 'mcc': 0.26065240107180143}
✅ Fold 2 RandomForest Metrics: {'accuracy': 0.6302723661811992, 'precision': 0.6316292403248925, 'recall': 0.6251182144883677, 'f1': 0.6283568610675412, 'roc_auc': 0.6820853135918462, 'average_precision': 0.6942971250206991, 'mcc': 0.2605585763530815}
✅ Fold 2 XGBoost Metrics: {'accuracy': 0.6302723661811992, 'precision': 0.6316292403248925, 'recall': 0.6251182144883677, 'f1': 0.6283568610675412, 'roc_auc': 0.6821471911736712, 'average_precision': 0.6950713386435794, 'mcc': 0.2605585763530815}
✅ Fold 2 Voting Metrics: {'accuracy': 0.6303196519765463, 'precision': 0.6316392816201758, 'recall': 0.625307357669756, 'f1': 0.628457370972341, 'roc_auc': 0.6822530766533598, 'average_precision': 0.6952778469931019, 'mcc': 0.26065240107180143}

Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 563214.97it/s]
Building training set: 100%|██████████| 84588/84588 [00:00<00:00, 1503064.22it/s]
Building test set: 100%|██████████| 21146/21146 [00:00<00:00, 870492.62it/s]


✅ Fold 3 LogisticRegression Metrics: {'accuracy': 0.6387023550553296, 'precision': 0.6406984553391538, 'recall': 0.6316088149058924, 'f1': 0.6361211659363688, 'roc_auc': 0.6932911529610574, 'average_precision': 0.7068028017533603, 'mcc': 0.27743263139901114}
✅ Fold 3 RandomForest Metrics: {'accuracy': 0.6387023550553296, 'precision': 0.6406984553391538, 'recall': 0.6316088149058924, 'f1': 0.6361211659363688, 'roc_auc': 0.6933657314083297, 'average_precision': 0.706883893058802, 'mcc': 0.27743263139901114}
✅ Fold 3 XGBoost Metrics: {'accuracy': 0.6387023550553296, 'precision': 0.6406984553391538, 'recall': 0.6316088149058924, 'f1': 0.6361211659363688, 'roc_auc': 0.693392420240936, 'average_precision': 0.7072062232682449, 'mcc': 0.27743263139901114}
✅ Fold 3 Voting Metrics: {'accuracy': 0.6387023550553296, 'precision': 0.6406984553391538, 'recall': 0.6316088149058924, 'f1': 0.6361211659363688, 'roc_auc': 0.6932855441465628, 'average_precision': 0.7072888556184862, 'mcc': 0.27743263139901

Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 568967.45it/s]
Building training set: 100%|██████████| 84588/84588 [00:00<00:00, 1292209.64it/s]
Building test set: 100%|██████████| 21146/21146 [00:00<00:00, 649050.51it/s]


✅ Fold 4 LogisticRegression Metrics: {'accuracy': 0.6343043601626785, 'precision': 0.6501692047377327, 'recall': 0.5814811311832025, 'f1': 0.6139098307454192, 'roc_auc': 0.6874352107007521, 'average_precision': 0.7008130263852642, 'mcc': 0.27012038043045555}
✅ Fold 4 RandomForest Metrics: {'accuracy': 0.6343043601626785, 'precision': 0.6501692047377327, 'recall': 0.5814811311832025, 'f1': 0.6139098307454192, 'roc_auc': 0.6871846747078579, 'average_precision': 0.6988749072919036, 'mcc': 0.27012038043045555}
✅ Fold 4 XGBoost Metrics: {'accuracy': 0.6343043601626785, 'precision': 0.6501692047377327, 'recall': 0.5814811311832025, 'f1': 0.6139098307454192, 'roc_auc': 0.687210674738684, 'average_precision': 0.6991413062549879, 'mcc': 0.27012038043045555}
✅ Fold 4 Voting Metrics: {'accuracy': 0.6343043601626785, 'precision': 0.6501692047377327, 'recall': 0.5814811311832025, 'f1': 0.6139098307454192, 'roc_auc': 0.6873854291175602, 'average_precision': 0.6999485554258756, 'mcc': 0.2701203804304

Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 554541.99it/s]
Building training set: 100%|██████████| 84588/84588 [00:00<00:00, 1417767.41it/s]
Building test set: 100%|██████████| 21146/21146 [00:00<00:00, 1050824.64it/s]


✅ Fold 5 LogisticRegression Metrics: {'accuracy': 0.6383713231816892, 'precision': 0.6558704453441295, 'recall': 0.5822377754658091, 'f1': 0.6168645723733654, 'roc_auc': 0.6883690559503757, 'average_precision': 0.7013839432704542, 'mcc': 0.2785033259480083}
✅ Fold 5 RandomForest Metrics: {'accuracy': 0.6378038399697342, 'precision': 0.6554299125240025, 'recall': 0.5811028090418991, 'f1': 0.6160324860881335, 'roc_auc': 0.6873303965389803, 'average_precision': 0.6985707794480183, 'mcc': 0.2773971189215149}
✅ Fold 5 XGBoost Metrics: {'accuracy': 0.6383713231816892, 'precision': 0.6558704453441295, 'recall': 0.5822377754658091, 'f1': 0.6168645723733654, 'roc_auc': 0.6878710880453361, 'average_precision': 0.6995343500931022, 'mcc': 0.2785033259480083}
✅ Fold 5 Voting Metrics: {'accuracy': 0.6383713231816892, 'precision': 0.6558704453441295, 'recall': 0.5822377754658091, 'f1': 0.6168645723733654, 'roc_auc': 0.6880951096424386, 'average_precision': 0.7000966258431477, 'mcc': 0.278503325948008

Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 581665.69it/s]
Building training set: 100%|██████████| 84586/84586 [00:00<00:00, 1526517.24it/s]
Building test set: 100%|██████████| 21148/21148 [00:00<00:00, 1150095.83it/s]


✅ Fold 1 LogisticRegression Metrics: {'accuracy': 0.6388310951390203, 'precision': 0.6394376899696048, 'recall': 0.6366559485530546, 'f1': 0.6380437873187376, 'roc_auc': 0.6965286600573998, 'average_precision': 0.7106027471341437, 'mcc': 0.27766481770084533}
✅ Fold 1 RandomForest Metrics: {'accuracy': 0.6384528087762437, 'precision': 0.6391634980988593, 'recall': 0.6358993758275014, 'f1': 0.6375272589361903, 'roc_auc': 0.695377599301154, 'average_precision': 0.7080961627426461, 'mcc': 0.2769092284833489}
✅ Fold 1 XGBoost Metrics: {'accuracy': 0.6385000945715907, 'precision': 0.6391977948864176, 'recall': 0.6359939474181956, 'f1': 0.6375918464091017, 'roc_auc': 0.6955181643101519, 'average_precision': 0.7085423297199379, 'mcc': 0.2770036687596707}
✅ Fold 1 Voting Metrics: {'accuracy': 0.6388310951390203, 'precision': 0.6394376899696048, 'recall': 0.6366559485530546, 'f1': 0.6380437873187376, 'roc_auc': 0.6962888056107158, 'average_precision': 0.7095346798633109, 'mcc': 0.277664817700845

Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 561137.63it/s]
Building training set: 100%|██████████| 84586/84586 [00:00<00:00, 1329987.17it/s]
Building test set: 100%|██████████| 21148/21148 [00:00<00:00, 878420.46it/s]


✅ Fold 2 LogisticRegression Metrics: {'accuracy': 0.6341025156043124, 'precision': 0.636084452975048, 'recall': 0.6268205031208625, 'f1': 0.6314185005239592, 'roc_auc': 0.6878237762244768, 'average_precision': 0.7024862876699869, 'mcc': 0.2682334803294627}
✅ Fold 2 RandomForest Metrics: {'accuracy': 0.6336769434461887, 'precision': 0.6357698588031889, 'recall': 0.6259693588046151, 'f1': 0.6308315463426257, 'roc_auc': 0.687444309281979, 'average_precision': 0.7027286760183916, 'mcc': 0.2673856578651276}
✅ Fold 2 XGBoost Metrics: {'accuracy': 0.6341025156043124, 'precision': 0.636084452975048, 'recall': 0.6268205031208625, 'f1': 0.6314185005239592, 'roc_auc': 0.6878357966725468, 'average_precision': 0.7035267835362289, 'mcc': 0.2682334803294627}
✅ Fold 2 Voting Metrics: {'accuracy': 0.6341025156043124, 'precision': 0.636084452975048, 'recall': 0.6268205031208625, 'f1': 0.6314185005239592, 'roc_auc': 0.6878693895318856, 'average_precision': 0.7036364435082589, 'mcc': 0.2682334803294627}



Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 582836.06it/s]
Building training set: 100%|██████████| 84588/84588 [00:00<00:00, 1522596.34it/s]
Building test set: 100%|██████████| 21146/21146 [00:00<00:00, 1059270.90it/s]


✅ Fold 3 LogisticRegression Metrics: {'accuracy': 0.6354393265865885, 'precision': 0.637217324645458, 'recall': 0.6289605599167691, 'f1': 0.6330620210386025, 'roc_auc': 0.6900637856390177, 'average_precision': 0.7046252944417478, 'mcc': 0.27090139598445967}
✅ Fold 3 RandomForest Metrics: {'accuracy': 0.6353920363189256, 'precision': 0.6371825586966938, 'recall': 0.6288659793814433, 'f1': 0.632996953541508, 'roc_auc': 0.6898619443537795, 'average_precision': 0.7032619373409098, 'mcc': 0.27080714065799183}
✅ Fold 3 XGBoost Metrics: {'accuracy': 0.6354393265865885, 'precision': 0.637217324645458, 'recall': 0.6289605599167691, 'f1': 0.6330620210386025, 'roc_auc': 0.6899403604109692, 'average_precision': 0.7036827236523229, 'mcc': 0.27090139598445967}
✅ Fold 3 Voting Metrics: {'accuracy': 0.6354393265865885, 'precision': 0.637217324645458, 'recall': 0.6289605599167691, 'f1': 0.6330620210386025, 'roc_auc': 0.6900134494362108, 'average_precision': 0.7041781030149334, 'mcc': 0.2709013959844596

Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 312862.37it/s]
Building training set: 100%|██████████| 84588/84588 [00:00<00:00, 1328459.85it/s]
Building test set: 100%|██████████| 21146/21146 [00:00<00:00, 1001080.76it/s]


✅ Fold 4 LogisticRegression Metrics: {'accuracy': 0.6372363567577792, 'precision': 0.6394387853161637, 'recall': 0.6293388820580724, 'f1': 0.6343486343486343, 'roc_auc': 0.6926763302813123, 'average_precision': 0.7064087372292486, 'mcc': 0.2745069577079529}
✅ Fold 4 RandomForest Metrics: {'accuracy': 0.6369053248841389, 'precision': 0.6391960765458217, 'recall': 0.6286768183107916, 'f1': 0.6338928094602327, 'roc_auc': 0.6919485083277342, 'average_precision': 0.704362423580917, 'mcc': 0.27384773581997257}
✅ Fold 4 XGBoost Metrics: {'accuracy': 0.6370944859547905, 'precision': 0.6393348072671344, 'recall': 0.629055140452095, 'f1': 0.6341533180778032, 'roc_auc': 0.6922818615528281, 'average_precision': 0.7050853886011706, 'mcc': 0.27422442103970035}
✅ Fold 4 Voting Metrics: {'accuracy': 0.6372363567577792, 'precision': 0.6394387853161637, 'recall': 0.6293388820580724, 'f1': 0.6343486343486343, 'roc_auc': 0.6925760514767153, 'average_precision': 0.7061782076963271, 'mcc': 0.274506957707952

Precomputing structural features: 100%|██████████| 105734/105734 [00:00<00:00, 575627.30it/s]
Building training set: 100%|██████████| 84588/84588 [00:00<00:00, 1425600.56it/s]
Building test set: 100%|██████████| 21146/21146 [00:00<00:00, 978570.67it/s]


✅ Fold 5 LogisticRegression Metrics: {'accuracy': 0.6396954506762508, 'precision': 0.6405060882800608, 'recall': 0.636810744348813, 'f1': 0.6386530709034859, 'roc_auc': 0.6947449004269488, 'average_precision': 0.7055512973190249, 'mcc': 0.2793955513884557}
✅ Fold 5 RandomForest Metrics: {'accuracy': 0.6396954506762508, 'precision': 0.6405060882800608, 'recall': 0.636810744348813, 'f1': 0.6386530709034859, 'roc_auc': 0.6947254261220775, 'average_precision': 0.7059736831684501, 'mcc': 0.2793955513884557}
✅ Fold 5 XGBoost Metrics: {'accuracy': 0.6396954506762508, 'precision': 0.6405060882800608, 'recall': 0.636810744348813, 'f1': 0.6386530709034859, 'roc_auc': 0.6946068538156608, 'average_precision': 0.7057623477119144, 'mcc': 0.2793955513884557}
✅ Fold 5 Voting Metrics: {'accuracy': 0.6396954506762508, 'precision': 0.6405060882800608, 'recall': 0.636810744348813, 'f1': 0.6386530709034859, 'roc_auc': 0.694777685602582, 'average_precision': 0.7062596030603211, 'mcc': 0.2793955513884557}

5

In [1]:
import inspect
from node2vec import Node2Vec

print(inspect.signature(Node2Vec))


(graph, dimensions=128, walk_length=80, num_walks=10, p=1, q=1, weight_key='weight', workers=1, sampling_strategy=None, quiet=False, temp_folder=None)
