# 00 · S3WD Baseline（中文）

In [1]:
# ================================
# 0. 环境初始化与模块导入（中文）
# ================================
import os, sys, platform, importlib, inspect, warnings
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import (f1_score, balanced_accuracy_score, precision_score, recall_score,
                             matthews_corrcoef, cohen_kappa_score, roc_auc_score)
# 1) 确保项目根目录在 sys.path[0]（notebooks/ 的上一级）
sys.path.insert(0, os.path.abspath('..'))

# 2) 导入自研库（s3wdlib）各模块，并强制重载确保拿到最新版实现
import s3wdlib.zh_utils as zh_utils
import s3wdlib.data_io as data_io
import s3wdlib.features as features
import s3wdlib.kwb as kwb
import s3wdlib.objective as objective
import s3wdlib.trainer as trainer

importlib.reload(zh_utils)
importlib.reload(data_io)
importlib.reload(features)
importlib.reload(kwb)
importlib.reload(objective)
importlib.reload(trainer)

# 3) 把常用符号直接引入（可读性更好）
from s3wdlib.zh_utils import set_chinese_font, fix_minus
from s3wdlib.data_io import load_table_auto, minmax_scale_fit_transform
from s3wdlib.features import rank_features_mi, make_levels
from s3wdlib.kwb import KWBProbEstimator
from s3wdlib.objective import S3WDParams
from s3wdlib.trainer import PSOParams, pso_learn_thresholds
from s3wdlib.config_loader import load_yaml_cfg, extract_vars, show_cfg

# 4) 可视化中文设置（宋体优先，负号正常）
set_chinese_font(); fix_minus()
print("【可视化字体】已设置为宋体优先（若系统缺失则自动回退）。")

# 5) 版本与路径自检（定位是否导入了正确文件）
print("【Python】", platform.python_version())
print("【Pandas/Numpy】", pd.__version__, np.__version__)
print("【模块路径】")
print("  zh_utils   ->", zh_utils.__file__)
print("  data_io    ->", data_io.__file__)
print("  features   ->", features.__file__)
print("  kwb        ->", kwb.__file__)
print("  objective  ->", objective.__file__)
print("  trainer    ->", trainer.__file__)
print("【函数签名】load_table_auto:", inspect.signature(data_io.load_table_auto))

# 6) 随机种子（方便复现；如需完全一致可统一设置）
np.random.seed(42)

# 7) 警告精简（可选）
warnings.filterwarnings("ignore")




【可视化字体】已设置为宋体优先（若系统缺失则自动回退）。
【Python】 3.11.5
【Pandas/Numpy】 2.0.3 1.26.4
【模块路径】
  zh_utils   -> e:\yan\组\三支决策\机器学习\C三支决策与不平衡数据集分类\S3WD实验\s3wdlib\zh_utils.py
  data_io    -> e:\yan\组\三支决策\机器学习\C三支决策与不平衡数据集分类\S3WD实验\s3wdlib\data_io.py
  features   -> e:\yan\组\三支决策\机器学习\C三支决策与不平衡数据集分类\S3WD实验\s3wdlib\features.py
  kwb        -> e:\yan\组\三支决策\机器学习\C三支决策与不平衡数据集分类\S3WD实验\s3wdlib\kwb.py
  objective  -> e:\yan\组\三支决策\机器学习\C三支决策与不平衡数据集分类\S3WD实验\s3wdlib\objective.py
  trainer    -> e:\yan\组\三支决策\机器学习\C三支决策与不平衡数据集分类\S3WD实验\s3wdlib\trainer.py
【函数签名】load_table_auto: (path: 'str', label_col: 'Optional[str | int]' = None, positive_label=1, continuous_label: 'Optional[str]' = None, threshold: 'Optional[float]' = None, threshold_op: 'str' = '>=') -> 'Tuple[pd.DataFrame, pd.Series]'


In [2]:
# === 配置接入（YAML → dataclass → 变量字典）===
# wine
# CFG = load_yaml_cfg("../configs/s3wd_wine.yaml")  # ← 如换配置文件，只改这里
# Heart
# CFG = load_yaml_cfg("../configs/s3wd_heart.yaml")
# Credit
# CFG = load_yaml_cfg("../configs/s3wd_credit.yaml")
#airline
CFG = load_yaml_cfg("../configs/s3wd_airline.yaml")


V   = extract_vars(CFG)
show_cfg(CFG)

# ✅ 兼容“连续列二值化”和“已有二值标签”两种配置
if "CONT_LABEL" in V:
    label_desc = f"{V['CONT_LABEL']}{V['CONT_OP']}{V['CONT_THRESH']}"
elif "LABEL_COL" in V:
    label_desc = f"{V['LABEL_COL']}=={V.get('POSITIVE_LABEL', 1)}"
else:
    label_desc = "(未检测到标签配置)"

print("【参数就绪（来自 YAML）】", {
    "DATA_PATH": V["DATA_PATH"],
    "label": label_desc,
    "splits": f"test={V['TEST_SIZE']}, val={V['VAL_SIZE']}, seed={V['SEED']}",
    "kwb.k": V["KWB_K"],
    "pso": {"particles": V["PSO_particles"], "iters": V["PSO_iters"]}
})




【配置快照】
- DATA: {'data_dir': '../data', 'data_file': 'airlines_train_regression_1000000.arff', 'continuous_label': 'DepDelay', 'threshold': 15, 'threshold_op': '>', 'label_col': None, 'positive_label': None, 'test_size': 0.3, 'val_size': 0.3, 'random_state': 42}
- LEVEL: {'level_pcts': [0.6, 0.8, 1.0], 'ranker': 'mi'}
- KWB: {'k': 6, 'metric': 'euclidean', 'eps': 1e-06}
- S3WD: {'c1': 0.37, 'c2': 0.63, 'xi_min': 0.1, 'theta_pos': 0.9, 'theta_neg': 0.1, 'sigma': 3.0, 'regret_mode': 'utility', 'penalty_large': 1000000.0, 'gamma_last': True, 'gap': 0.02}
- PSO: {'particles': 20, 'iters': 20, 'w_max': 0.9, 'w_min': 0.4, 'c1': 2.8, 'c2': 1.3, 'seed': 42}
【参数就绪（来自 YAML）】 {'DATA_PATH': '../data\\airlines_train_regression_1000000.arff', 'label': 'DepDelay>15', 'splits': 'test=0.3, val=0.3, seed=42', 'kwb.k': 6, 'pso': {'particles': 20, 'iters': 20}}


In [3]:
# === 读取数据（兼容“连续列二值化 / 已有二值标签”两种配置）+ 切分 ===
from s3wdlib.data_io import load_table_auto

# 统一参数打包（不存在的键用 .get() 不报错）
kw = dict(
    path=V["DATA_PATH"],
    label_col=V.get("LABEL_COL"),
    positive_label=V.get("POSITIVE_LABEL"),
    continuous_label=V.get("CONT_LABEL"),
    threshold=V.get("CONT_THRESH"),
    threshold_op=V.get("CONT_OP"),
)

# 友好提示
if V.get("CONT_LABEL") is not None:
    print(f"【标签策略】连续列二值化：{V['CONT_LABEL']} {V['CONT_OP']} {V['CONT_THRESH']}")
elif V.get("LABEL_COL") is not None:
    print(f"【标签策略】已有标签列：{V['LABEL_COL']} == {V.get('POSITIVE_LABEL', 1)} 视为正类")
else:
    raise RuntimeError("未检测到标签配置（既无 CONT_* 也无 LABEL_COL）。请检查 YAML。")

# 读取数据
X_all, y_all = load_table_auto(**kw)
print("【数据加载】X_all, y_all =", X_all.shape, y_all.shape)

# 切分（与 YAML 一致）
Xtr, Xte, ytr, yte = train_test_split(
    X_all, y_all,
    test_size=V["TEST_SIZE"],
    random_state=V["SEED"],
    stratify=y_all
)
print("【数据切分】Xtr/Xte =", Xtr.shape, Xte.shape)


【标签策略】连续列二值化：DepDelay > 15
【数据加载完毕】样本数=1000000，特征数=9，正类比例=0.1559
【数据加载】X_all, y_all = (1000000, 9) (1000000,)
【数据切分】Xtr/Xte = (700000, 9) (300000, 9)


In [4]:
# 1) 训练集内切出验证集（仅用于阈值寻优）
Xtr_sub, Xva, ytr_sub, yva = train_test_split(
    Xtr, ytr, test_size=V["VAL_SIZE"], stratify=ytr, random_state=V["SEED"]
)

# 2) 归一化（仅在训练子集拟合）
Xtr2, Xva2, scaler = minmax_scale_fit_transform(Xtr_sub, Xva)
Xte2 = pd.DataFrame(scaler.transform(Xte), columns=Xte.columns)

# 3) 分层（训练子集上）
feat_rank, mi_vals = rank_features_mi(Xtr2, ytr_sub)
L1, L2, L3 = make_levels(feat_rank)
print(f"【分层复核】总特征={len(feat_rank)} | L1={len(L1)} L2={len(L2)} L3={len(L3)}")


【归一化】已对训练/测试集进行 MinMax 缩放到 [0,1]。
【分层复核】总特征=9 | L1=5 L2=7 L3=9


## KWB（Algorithm 1）训练与三层概率

In [5]:

# 4) KWB（训练子集拟合；验证/测试上出概率）
kwb1 = KWBProbEstimator(
        k=V["KWB_K"],
        metric=V["KWB_metric"],
        eps=V["KWB_eps"],
        use_faiss=V["KWB_use_faiss"],
        faiss_gpu=V["KWB_faiss_gpu"],
    ).fit(Xtr2[L1], ytr_sub)
kwb2 = KWBProbEstimator(
        k=V["KWB_K"],
        metric=V["KWB_metric"],
        eps=V["KWB_eps"],
        use_faiss=V["KWB_use_faiss"],
        faiss_gpu=V["KWB_faiss_gpu"],
    ).fit(Xtr2[L2], ytr_sub)
kwb3 = KWBProbEstimator(
        k=V["KWB_K"],
        metric=V["KWB_metric"],
        eps=V["KWB_eps"],
        use_faiss=V["KWB_use_faiss"],
        faiss_gpu=V["KWB_faiss_gpu"],
    ).fit(Xtr2[L3], ytr_sub)
p1_va = kwb1.predict_proba(Xva2[L1]); p2_va = kwb2.predict_proba(Xva2[L2]); p3_va = kwb3.predict_proba(Xva2[L3])
p1_te = kwb1.predict_proba(Xte2[L1]); p2_te = kwb2.predict_proba(Xte2[L2]); p3_te = kwb3.predict_proba(Xte2[L3])
print("【KWB 完成】验证/测试三层概率就绪。")


【KWB 完成】验证/测试三层概率就绪。


## 验证集 PSO 学阈值（信息增益−后悔值 + 单调序 + ξ）

In [6]:
# 5) 验证集 PSO 学阈值（信息增益−后悔值 + 单调序 + ξ）
s3 = S3WDParams(
    c1=V["S3_c1"], c2=V["S3_c2"], xi_min=V["S3_xi_min"],
    theta_pos=V["S3_theta_pos"], theta_neg=V["S3_theta_neg"],
    penalty_large=V["S3_penalty_large"],
    gamma_last=V.get("S3_gamma_last"),   # ← 用 gamma_last（True 或 0.5）
    gap=V.get("S3_gap", 0.02)
)
pso = PSOParams(
    particles=V["PSO_particles"], iters=V["PSO_iters"],
    w_max=V["PSO_w_max"], w_min=V["PSO_w_min"],
    c1=V["PSO_c1"], c2=V["PSO_c2"], seed=V["PSO_seed"], use_gpu=V["PSO_use_gpu"]
)
(best_th, best_fit, detail) = pso_learn_thresholds([p1_va, p2_va, p3_va], yva.values, s3, pso)

alphas, betas, gamma3 = best_th
print("【PSO 学到阈值（验证集）】", [f"α{i+1}={a:.4f}/β{i+1}={b:.4f}" for i,(a,b) in enumerate(zip(alphas,betas))], f"γ3={gamma3:.4f}")
print("【适应度/约束】", {"fit":round(best_fit,4), "pen_bnd":detail.get("pen_bnd",None), "pen_mono":detail.get("pen_mono",None)})


【PSO 学到阈值（验证集）】 ['α1=0.4608/β1=0.1935', 'α2=0.4608/β2=0.1935', 'α3=0.4608/β3=0.3395'] γ3=0.5000
【适应度/约束】 {'fit': -0.009, 'pen_bnd': 0.0, 'pen_mono': 0.0}


## 测试集序贯三支决策 + 评估

In [7]:

# 6) 测试集序贯三支决策 + 评估
def _seq_predict_eval(p1, p2, p3, y_true, a1, b1, a2, b2, g3):
    POS1 = (p1 >= a1); NEG1 = (p1 <= b1); BND1 = (~POS1) & (~NEG1)
    p2s = p2[BND1]; POS2 = np.zeros_like(BND1, bool); NEG2 = np.zeros_like(BND1, bool)
    POS2[BND1] = (p2s >= a2); NEG2[BND1] = (p2s <= b2)
    BND2 = BND1 & (~POS2) & (~NEG2)
    p3s = p3[BND2]; POS3 = np.zeros_like(BND2, bool); NEG3 = np.zeros_like(BND2, bool)
    POS3[BND2] = (p3s >= g3); NEG3[BND2] = ~POS3[BND2]
    y_hat = np.full_like(y_true, -1, int)
    y_hat[POS1]=1; y_hat[NEG1]=0; y_hat[POS2]=1; y_hat[NEG2]=0; y_hat[POS3]=1; y_hat[NEG3]=0
    flow = {"L1":(int(POS1.sum()), int(BND1.sum()), int(NEG1.sum())),
            "L2":(int(POS2.sum()), int(BND2.sum()), int(NEG2.sum())),
            "L3":(int(POS3.sum()), int(NEG3.sum()))}
    return y_hat, flow

a1,b1 = float(alphas[0]), float(betas[0])
a2,b2 = float(alphas[1]), float(betas[1])
g3    = float(gamma3)

y_hat, flow = _seq_predict_eval(p1_te, p2_te, p3_te, yte.values,
                           float(alphas[0]), float(betas[0]),
                           float(alphas[1]), float(betas[1]),
                           g3=0.5)
print("【样本流转（学到的阈值）】L1 POS/BND/NEG =", *flow["L1"], " | L2 POS/BND/NEG =", *flow["L2"], " | L3 POS/NEG =", *flow["L3"])

mask = (y_hat >= 0)
metrics = {
    'F1': round(f1_score(yte[mask], y_hat[mask]),4),
    'BAC': round(balanced_accuracy_score(yte[mask], y_hat[mask]),4),
    'Prec': round(precision_score(yte[mask], y_hat[mask]),4),
    'Rec': round(recall_score(yte[mask], y_hat[mask]),4),
    'MCC': round(matthews_corrcoef(yte[mask], y_hat[mask]),4),
    'Kappa': round(cohen_kappa_score(yte[mask], y_hat[mask]),4),
    'AUC': round(roc_auc_score(yte[mask], y_hat[mask]),4)
}
print("【评估（测试集）】", metrics)

【样本流转（学到的阈值）】L1 POS/BND/NEG = 22202 155788 122010  | L2 POS/BND/NEG = 13208 89126 53454  | L3 POS/NEG = 8144 80982
【评估（测试集）】 {'F1': 0.2367, 'BAC': 0.5494, 'Prec': 0.2454, 'Rec': 0.2286, 'MCC': 0.1018, 'Kappa': 0.1017, 'AUC': 0.5494}


## 10 次独立 70/30 划分（Train→Val(寻优)→Test(评估)

In [8]:
# === 外层评测：10 次独立 70/30 划分（Train→Val(寻优)→Test(评估)）===
from sklearn.model_selection import StratifiedShuffleSplit, train_test_split
from sklearn.metrics import (f1_score, balanced_accuracy_score, precision_score, recall_score,
                             matthews_corrcoef, cohen_kappa_score, roc_auc_score)
import numpy as np, pandas as pd, importlib

# 确保拿到最新版目标/训练器实现
import s3wdlib.objective as objective, s3wdlib.trainer as trainer
importlib.reload(objective); importlib.reload(trainer)
from s3wdlib.objective import S3WDParams
from s3wdlib.trainer import PSOParams, pso_learn_thresholds
from s3wdlib.data_io import minmax_scale_fit_transform, load_table_auto
from s3wdlib.features import rank_features_mi, make_levels
from s3wdlib.kwb import KWBProbEstimator

def _seq_predict(p1, p2, p3, y_true, a1, b1, a2, b2, g3):
    POS1 = (p1 >= a1); NEG1 = (p1 <= b1); BND1 = (~POS1) & (~NEG1)
    p2s = p2[BND1]; POS2 = np.zeros_like(BND1, bool); NEG2 = np.zeros_like(BND1, bool)
    POS2[BND1] = (p2s >= a2); NEG2[BND1] = (p2s <= b2)
    BND2 = BND1 & (~POS2) & (~NEG2)
    p3s = p3[BND2]; POS3 = np.zeros_like(BND2, bool); NEG3 = np.zeros_like(BND2, bool)
    POS3[BND2] = (p3s >= g3); NEG3[BND2] = ~POS3[BND2]
    y_hat = np.full_like(y_true, -1, int)
    y_hat[POS1]=1; y_hat[NEG1]=0; y_hat[POS2]=1; y_hat[NEG2]=0; y_hat[POS3]=1; y_hat[NEG3]=0
    flow = {"L1":(int(POS1.sum()), int(BND1.sum()), int(NEG1.sum())),
            "L2":(int(POS2.sum()), int(BND2.sum()), int(NEG2.sum())),
            "L3":(int(POS3.sum()), int(NEG3.sum()))}
    return y_hat, flow

def _safe_auc(y_true, y_pred):
    try:
        return roc_auc_score(y_true, y_pred)
    except Exception:
        return float("nan")

def run_one_split(seed: int):
    # === 读取全量数据（兼容 “连续列二值化 / 已有标签列”）===
    kw = dict(
        path=V["DATA_PATH"],
        label_col=V.get("LABEL_COL"),
        positive_label=V.get("POSITIVE_LABEL"),
        continuous_label=V.get("CONT_LABEL"),
        threshold=V.get("CONT_THRESH"),
        threshold_op=V.get("CONT_OP"),
    )
    X_all, y_all = load_table_auto(**kw)

    # === 外层一次 70/30 划分 ===
    sss = StratifiedShuffleSplit(n_splits=1, test_size=V["TEST_SIZE"], random_state=seed)
    (tr_idx, te_idx), = sss.split(X_all, y_all)
    Xtr_all, Xte = X_all.iloc[tr_idx], X_all.iloc[te_idx]
    ytr_all, yte  = y_all.iloc[tr_idx], y_all.iloc[te_idx]

    # === 训练集再切 val（仅用于阈值寻优）===
    Xtr, Xva, ytr, yva = train_test_split(
        Xtr_all, ytr_all, test_size=V["VAL_SIZE"], stratify=ytr_all, random_state=seed
    )

    # === 归一化仅在训练子集拟合 ===
    Xtr2, Xva2, scaler = minmax_scale_fit_transform(Xtr, Xva)
    Xte2 = pd.DataFrame(scaler.transform(Xte), columns=Xte.columns)

    # === 分层在训练子集上确定（互信息）===
    feat_rank, mi_vals = rank_features_mi(Xtr2, ytr)   # ← 修正 ytr_sub 未定义
    L1, L2, L3 = make_levels(feat_rank, V.get("LEVEL_PCTS", [0.6,0.8,1.0]))

    # === KWB 训练（训练子集）与概率（val/test）===
    k = int(V["KWB_K"])
    kwb1 = KWBProbEstimator(
        k=k,
        metric=V["KWB_metric"],
        eps=V["KWB_eps"],
        use_faiss=V["KWB_use_faiss"],
        faiss_gpu=V["KWB_faiss_gpu"],
    ).fit(Xtr2[L1], ytr)
    kwb2 = KWBProbEstimator(
        k=k,
        metric=V["KWB_metric"],
        eps=V["KWB_eps"],
        use_faiss=V["KWB_use_faiss"],
        faiss_gpu=V["KWB_faiss_gpu"],
    ).fit(Xtr2[L2], ytr)
    kwb3 = KWBProbEstimator(
        k=k,
        metric=V["KWB_metric"],
        eps=V["KWB_eps"],
        use_faiss=V["KWB_use_faiss"],
        faiss_gpu=V["KWB_faiss_gpu"],
    ).fit(Xtr2[L3], ytr)
    p1_va = kwb1.predict_proba(Xva2[L1]); p2_va = kwb2.predict_proba(Xva2[L2]); p3_va = kwb3.predict_proba(Xva2[L3])
    p1_te = kwb1.predict_proba(Xte2[L1]); p2_te = kwb2.predict_proba(Xte2[L2]); p3_te = kwb3.predict_proba(Xte2[L3])

    # === 验证集上 PSO 学阈值 ===
    s3 = S3WDParams(
        c1=V["S3_c1"], c2=V["S3_c2"], xi_min=V["S3_xi_min"],
        theta_pos=V["S3_theta_pos"], theta_neg=V["S3_theta_neg"],
        penalty_large=V["S3_penalty_large"]  # 若你的 S3WDParams 支持 gamma_last/gap，这里再加
    )
    pso = PSOParams(
        particles=V["PSO_particles"], iters=V["PSO_iters"],
        w_max=V["PSO_w_max"], w_min=V["PSO_w_min"],
        c1=V["PSO_c1"], c2=V["PSO_c2"], seed=seed, use_gpu=V["PSO_use_gpu"]
    )
    (alphas, betas, gamma3), fit, detail = pso_learn_thresholds([p1_va, p2_va, p3_va], yva.values, s3, pso)

    # === 测试集序贯三支决策 + 指标（MCC 传 y_true,y_pred；AUC 兜底）===
    y_hat, flow = _seq_predict(p1_te, p2_te, p3_te, yte.values,
                               float(alphas[0]), float(betas[0]),
                               float(alphas[1]), float(betas[1]),
                               float(gamma3))
    mask = (y_hat >= 0)
    yt, yp = yte[mask], y_hat[mask]
    metrics = {
        'F1':   f1_score(yt, yp),
        'BAC':  balanced_accuracy_score(yt, yp),
        'Prec': precision_score(yt, yp),
        'Rec':  recall_score(yt, yp),
        'MCC':  matthews_corrcoef(yt, yp),
        'Kappa':cohen_kappa_score(yt, yp),
        'AUC':  _safe_auc(yt, yp),
    }
    th = {'alpha1': float(alphas[0]), 'beta1': float(betas[0]),
          'alpha2': float(alphas[1]), 'beta2': float(betas[1]), 'gamma3': float(gamma3)}
    return metrics, flow, th, {'fit':float(fit), 'pen_bnd':detail.get('pen_bnd',0.0), 'pen_mono':detail.get('pen_mono',0.0)}

# —— 跑 10 次 —— #
rows = []
base_seed = V.get("PSO_seed", 42)
for i in range(10):
    m, flow, th, det = run_one_split(seed=base_seed + i)
    rows.append({
        **{k: (round(v,4) if isinstance(v, (int,float)) else v) for k,v in m.items()},
        'L1_POS': flow['L1'][0], 'L1_BND': flow['L1'][1], 'L1_NEG': flow['L1'][2],
        'L2_POS': flow['L2'][0], 'L2_BND': flow['L2'][1], 'L2_NEG': flow['L2'][2],
        'L3_POS': flow['L3'][0], 'L3_NEG': flow['L3'][1],
        **{k: round(v,4) for k,v in th.items()},
        **{k: round(v,4) for k,v in det.items()}
    })

df_res = pd.DataFrame(rows)
print("【10 次独立划分结果（前 5 行）】")
display(df_res.head())

summary = pd.DataFrame({
    'mean': df_res.mean(numeric_only=True).round(4),
    'std':  df_res.std(numeric_only=True).round(4)
})
print("【均值 ± 标准差】")
display(summary.loc[['F1','BAC','Prec','Rec','MCC','Kappa','AUC']])


【数据加载完毕】样本数=1000000，特征数=9，正类比例=0.1559
【归一化】已对训练/测试集进行 MinMax 缩放到 [0,1]。
【数据加载完毕】样本数=1000000，特征数=9，正类比例=0.1559
【归一化】已对训练/测试集进行 MinMax 缩放到 [0,1]。
【数据加载完毕】样本数=1000000，特征数=9，正类比例=0.1559
【归一化】已对训练/测试集进行 MinMax 缩放到 [0,1]。
【数据加载完毕】样本数=1000000，特征数=9，正类比例=0.1559
【归一化】已对训练/测试集进行 MinMax 缩放到 [0,1]。
【数据加载完毕】样本数=1000000，特征数=9，正类比例=0.1559
【归一化】已对训练/测试集进行 MinMax 缩放到 [0,1]。
【数据加载完毕】样本数=1000000，特征数=9，正类比例=0.1559
【归一化】已对训练/测试集进行 MinMax 缩放到 [0,1]。
【数据加载完毕】样本数=1000000，特征数=9，正类比例=0.1559
【归一化】已对训练/测试集进行 MinMax 缩放到 [0,1]。
【数据加载完毕】样本数=1000000，特征数=9，正类比例=0.1559
【归一化】已对训练/测试集进行 MinMax 缩放到 [0,1]。
【数据加载完毕】样本数=1000000，特征数=9，正类比例=0.1559
【归一化】已对训练/测试集进行 MinMax 缩放到 [0,1]。
【数据加载完毕】样本数=1000000，特征数=9，正类比例=0.1559
【归一化】已对训练/测试集进行 MinMax 缩放到 [0,1]。
【10 次独立划分结果（前 5 行）】


Unnamed: 0,F1,BAC,Prec,Rec,MCC,Kappa,AUC,L1_POS,L1_BND,L1_NEG,...,L3_POS,L3_NEG,alpha1,beta1,alpha2,beta2,gamma3,fit,pen_bnd,pen_mono
0,0.2367,0.5494,0.2454,0.2286,0.1018,0.1017,0.5494,22202,155788,122010,...,8144,80982,0.4608,0.1935,0.4608,0.1935,0.5,-0.009,0.0,0.0
1,0.2352,0.5483,0.2423,0.2285,0.0989,0.0989,0.5483,22369,155536,122095,...,8290,82904,0.4603,0.138,0.4603,0.138,0.5,-0.0087,0.0,0.0
2,0.2329,0.5467,0.2385,0.2276,0.0952,0.0952,0.5467,22325,158005,119670,...,8435,84056,0.4037,0.1902,0.4037,0.1902,0.5,-0.0085,0.0,0.0
3,0.2349,0.5482,0.2424,0.2278,0.0989,0.0988,0.5482,22510,155652,121838,...,8175,80097,0.4895,0.2209,0.4895,0.2209,0.5,-0.0096,0.0,0.0
4,0.2355,0.5484,0.2422,0.2292,0.099,0.0989,0.5484,22529,155655,121816,...,8251,80645,0.4762,0.1841,0.4762,0.1841,0.5,-0.0086,0.0,0.0


【均值 ± 标准差】


Unnamed: 0,mean,std
F1,0.2354,0.0023
BAC,0.5483,0.0014
Prec,0.2422,0.0028
Rec,0.2289,0.0021
MCC,0.099,0.003
Kappa,0.0989,0.003
AUC,0.5483,0.0014


In [9]:

summary.to_excel('../targets/airline.xlsx')
