单任务测试集（所有特征统一标准化）

In [5]:
import sys
from pathlib import Path
import pandas as pd


def resolve_project_root(marker: str = 'uhpc', max_levels: int = 5) -> Path:
    cwd = Path.cwd().resolve()
    for _ in range(max_levels):
        if (cwd / marker).exists():
            return cwd
        if cwd.parent == cwd:
            break
        cwd = cwd.parent
    raise FileNotFoundError(f"未在当前路径及其父级 {max_levels} 层内找到包含 '{marker}' 目录的项目根路径。")


PROJECT_ROOT = resolve_project_root()
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

from uhpc.class_method import scale_features

for candidate in [PROJECT_ROOT / 'uhpc' / 'UHPC.xlsx', PROJECT_ROOT / 'UHPC.xlsx']:
    if candidate.exists():
        data_path = candidate
        break
else:
    raise FileNotFoundError('未找到 UHPC.xlsx，请确认文件位置。')

multi_data = pd.read_excel(data_path)
multi_data_ = multi_data.copy()

# 测试集：行内没有任何缺失
test_mask = multi_data_.notna().all(axis=1)
test_data = multi_data_.loc[test_mask].reset_index(drop=True)

# 训练集：行内存在任意缺失（用于单任务基线）
train_mask = ~test_mask
train_data = multi_data_.loc[train_mask].reset_index(drop=True)

x_train_raw = train_data.iloc[:, :24]
y_train = train_data.iloc[:, 24:]
x_test_raw = test_data.iloc[:, :24]
y_test = test_data.iloc[:, 24:]

# README 要求：对所有特征做标准化
x_train, x_test = scale_features(x_train_raw, x_test_raw)


In [6]:
from uhpc.regressionskette import train_eval

order = ["Flexural strength", "Compressive strength", "Porosity", "Flowability"]


In [8]:
impute_methods = ["zero", "mean", "knn", "drop"]
for method in impute_methods:
    per_target_s, avg_s = train_eval(
        x_train,
        y_train,
        x_test,
        y_test,
        order=order,
        impute=method,
    )
    print(f"Impute = {method}")
    print(per_target_s)
    print(avg_s)
    print('-' * 40)


Impute = zero
{'Flexural strength': {'mse': 25.915470978081707, 'mae': 3.842858659486213, 'r2': 0.03767102795605448, 'rmse': 5.090724013151932}, 'Compressive strength': {'mse': 408.69686685083127, 'mae': 14.998002332322143, 'r2': -0.21076679821123556, 'rmse': 20.216252542220364}, 'Porosity': {'mse': 22.148724513154814, 'mae': 3.835130628220579, 'r2': -1.0764519705732782, 'rmse': 4.706243142162846}, 'Flowability': {'mse': 3.8533507180852187, 'mae': 1.6143921656845444, 'r2': 0.6828086112665754, 'rmse': 1.9629953433681953}}
{'avg_mse': 115.15360326503826, 'avg_mae': 6.07259594642837, 'avg_r2': -0.14168478239047094, 'avg_rmse': 7.994053760225834}
----------------------------------------
Impute = mean
{'Flexural strength': {'mse': 25.915470978081707, 'mae': 3.842858659486213, 'r2': 0.03767102795605448, 'rmse': 5.090724013151932}, 'Compressive strength': {'mse': 408.69686685083127, 'mae': 14.998002332322143, 'r2': -0.21076679821123556, 'rmse': 20.216252542220364}, 'Porosity': {'mse': 22.1487