定义需要优化的特征参数

In [None]:
import features

PARAM_SPACE = {
    # PseAAC
    "pseaac_lambda": {"type": "int", "low": 3, "high": 5},
    
    # EBGW
    "ebgw_window_size": {"type": "int", "low": 5, "high": 9},
    
    # PsePSSM
    "psepssm_lambda": {"type": "int", "low": 3, "high": 5},
    "psepssm_w": {"type": "float", "low": 0.1, "high": 0.9},
    
    # Node2Vec
    "n2v_dimensions": {"type": "int", "low": 16, "high": 64},
    "n2v_walk_length": {"type": "int", "low": 20, "high": 100},
    "n2v_num_walks": {"type": "int", "low": 10, "high": 50},
    "n2v_window": {"type": "int", "low": 10, "high": 64},
    
    # 模型参数（以XGBoost为例）
    "xgb_n_estimators": {"type": "int", "low": 100, "high": 500},
    "xgb_max_depth": {"type": "int", "low": 3, "high": 10},
    "xgb_learning_rate": {"type": "float", "low": 0.01, "high": 0.3, "log": True}
}

先进行图的求解

In [None]:
G = features.calculate_G

尝试参数优化

In [None]:
import pandas as pd
import numpy as np
from skopt import BayesSearchCV
from xgboost import XGBClassifier
import features

# 假设 df 是你的数据框，labels 是标签数据
df = pd.DataFrame()
labels = pd.DataFrame()
G = features.calculate_G(labels)

PARAM_SPACE = {
    # PseAAC
    "pseaac_lambda": (3, 5, 'uniform'),
    # EBGW
    "ebgw_window_size": (5, 9, 'uniform'),
    # PsePSSM
    "psepssm_lambda": (3, 5, 'uniform'),
    "psepssm_w": (0.1, 0.9, 'uniform'),
    # Node2Vec
    "n2v_dimensions": (16, 64, 'uniform'),
    "n2v_walk_length": (20, 100, 'uniform'),
    "n2v_num_walks": (10, 50, 'uniform'),
    "n2v_window": (10, 64, 'uniform'),
    # 模型参数（以XGBoost为例）
    "xgb_n_estimators": (100, 500, 'uniform'),
    "xgb_max_depth": (3, 10, 'uniform'),
    "xgb_learning_rate": (0.01, 0.3, 'log-uniform')
}

class FeatureGenerator:
    def __init__(self, df, labels, G):
        self.df = df
        self.labels = labels
        self.G = G

    def __call__(self, params):
        return features.generate_all(self.df, self.labels, self.G, params)

# 创建特征生成器
feature_generator = FeatureGenerator(df, labels, G)

# 创建 XGBoost 分类器
xgb = XGBClassifier()

# 自定义评估函数
def custom_eval(params):
    X = feature_generator(params)
    if X is None:
        return float('-inf')
    # 这里假设你有一个目标变量 y
    y = np.random.randint(0, 2, X.shape[0])
    xgb.set_params(**{k.replace('xgb_', ''): v for k, v in params.items() if k.startswith('xgb_')})
    xgb.fit(X, y)
    score = xgb.score(X, y)
    return score

# 使用 BayesSearchCV 进行参数优化
search = BayesSearchCV(
    lambda params: custom_eval(params),
    PARAM_SPACE,
    n_iter=10,  # 迭代次数
    cv=3,
    random_state=42
)

search.fit([{}])  # 传入一个空字典作为占位符

# 输出最优参数
print("Best parameters:", search.best_params_)