In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score, roc_curve
import matplotlib.pyplot as plt

In [3]:
# 1. 加载数据
data = pd.read_excel('data.xlsx')  # 替换为你的数据文件路径
features = ['zALFF', 'zfALFF', 'zReHo']
target = '组'

In [4]:
# 2. 准备数据
X = data[features]
y = data[target]

In [5]:
# 3. 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [16]:
# 4. 定义模型和参数空间
model = SVC(probability=True)
param_grid = {
    'kernel': ['linear', 'rbf', 'sigmoid', 'poly'],
    'C': [0.1, 1, 10],
    'gamma': [10, 1, 0.1, 0.01]
}

In [17]:
# 5. 进行网格搜索和交叉验证
cv = StratifiedKFold(n_splits=5)
grid_search = GridSearchCV(model, param_grid, scoring=['accuracy', 'recall', 'precision', 'roc_auc'], cv=cv, refit='roc_auc')
grid_search.fit(X_train, y_train)

In [18]:
# 6. 输出最优参数
best_params = grid_search.best_params_
print('最优参数:', best_params)

最优参数: {'C': 0.1, 'gamma': 10, 'kernel': 'sigmoid'}


In [20]:
# 7. 使用最优参数的模型进行预测
best_model = SVC(**best_params, probability=True)
y_pred = []
y_pred_proba = []

In [21]:
for train_index, val_index in cv.split(X_train, y_train):
    X_train_fold, X_val_fold = X_train.iloc[train_index], X_train.iloc[val_index]
    y_train_fold, y_val_fold = y_train.iloc[train_index], y_train.iloc[val_index]

    best_model.fit(X_train_fold, y_train_fold)
    y_pred.append(best_model.predict(X_val_fold))
    y_pred_proba.append(best_model.predict_proba(X_val_fold)[:, 1])

In [34]:
# 8. 计算评估指标
print(len(y_test))
print(len(y_pred[2]))
accuracy = accuracy_score(y_test, y_pred)
sensitivity = recall_score(y_test, y_pred)
specificity = recall_score(y_test, y_pred, pos_label=0)
auc = roc_auc_score(y_test, y_pred_proba)

# 9. 输出评估指标的平均值
metrics_mean = {'Accuracy': accuracy, 'Sensitivity': sensitivity, 'Specificity': specificity, 'AUC': auc}
metrics_mean_values = {k: np.mean(v) for k, v in metrics_mean.items()}
print('评估指标平均值:', metrics_mean_values)

# 10. 绘制ROC曲线
fpr, tpr, _ = roc_curve(y_test, y_pred_proba)
plt.plot(fpr, tpr)
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.show()

14
11


ValueError: Found input variables with inconsistent numbers of samples: [14, 5]