单任务测试集（所有特征统一标准化）

In [9]:
import sys
import warnings
from pathlib import Path
import pandas as pd

def resolve_project_root(marker: str = 'uhpc', max_levels: int = 5) -> Path:
    cwd = Path.cwd().resolve()
    for _ in range(max_levels):
        if (cwd / marker).exists():
            return cwd
        if cwd.parent == cwd:
            break
        cwd = cwd.parent
    raise FileNotFoundError(f"未在当前路径及其父级 {max_levels} 层内找到包含 '{marker}' 目录的项目根路径。")

PROJECT_ROOT = resolve_project_root()
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

warnings.filterwarnings('ignore')

from uhpc.class_method import (
    scale_features,
    DirectMissingModels,
    FeatureMissingModels,
    CompleteDataModels,
)

for candidate in [PROJECT_ROOT / 'uhpc' / 'UHPC.xlsx', PROJECT_ROOT / 'UHPC.xlsx']:
    if candidate.exists():
        data_path = candidate
        break
else:
    raise FileNotFoundError('未找到 UHPC.xlsx，请确认文件位置。')

uhpc_df = pd.read_excel(data_path)
complete_mask = uhpc_df.notna().all(axis=1)
train_df = uhpc_df.loc[~complete_mask].reset_index(drop=True)
test_df = uhpc_df.loc[complete_mask].reset_index(drop=True)

x_train_raw = train_df.iloc[:, :24]
y_train = train_df.iloc[:, 24:]
x_test_raw = test_df.iloc[:, :24]
y_test = test_df.iloc[:, 24:]

x_train, x_test = scale_features(x_train_raw, x_test_raw)


In [10]:
direct_runner = DirectMissingModels(x_train, y_train, x_test, y_test)
feature_runner = FeatureMissingModels(x_train, y_train, x_test, y_test)
complete_runner = CompleteDataModels(x_train, y_train, x_test, y_test)


In [11]:
direct_df = direct_runner.run()
feature_df = feature_runner.run()
complete_df = complete_runner.run()


  0%|          | 0/5 [00:00<?, ?it/s]

Performing feature selection for column '1'...
Selected 12 features for '1': ['5', '6', '7', '9', '10', '11', '12', '13', '14', '15', '19', '23']
Imputed column '1' with (834, 12)
Performing feature selection for column '2'...
Selected 2 features for '2': ['10', '15']
Imputed column '2' with (834, 2)
Performing feature selection for column '3'...
Selected 3 features for '3': ['5', '12', '20']
Imputed column '3' with (834, 3)
Performing feature selection for column '4'...
Selected 7 features for '4': ['3', '6', '10', '12', '15', '19', '20']
Imputed column '4' with (834, 7)
Performing feature selection for column '5'...
Selected 6 features for '5': ['1', '6', '7', '10', '12', '19']
Imputed column '5' with (834, 6)
Performing feature selection for column '6'...
Selected 11 features for '6': ['1', '5', '7', '10', '12', '14', '15', '18', '19', '20', '21']
Imputed column '6' with (834, 11)
Performing feature selection for column '7'...
Selected 7 features for '7': ['1', '2', '5', '10', '11',

 20%|██        | 1/5 [02:39<10:39, 159.92s/it]

Selected 17 features for 'Porosity': ['1', '6', '7', '9', '10', '11', '12', '13', '14', '15', '20', '22', '23', '24', 'Compressive strength', 'Flexural strength', 'Flowability']
Imputed column 'Porosity' with (834, 17)
Performing feature selection for column '1'...
Selected 12 features for '1': ['5', '6', '7', '9', '10', '11', '12', '13', '14', '15', '19', '23']
Imputed column '1' with (834, 12)
Performing feature selection for column '2'...
Selected 2 features for '2': ['10', '15']
Imputed column '2' with (834, 2)
Performing feature selection for column '3'...
Selected 3 features for '3': ['5', '12', '20']
Imputed column '3' with (834, 3)
Performing feature selection for column '4'...
Selected 7 features for '4': ['3', '6', '10', '12', '15', '19', '20']
Imputed column '4' with (834, 7)
Performing feature selection for column '5'...
Selected 6 features for '5': ['1', '6', '7', '10', '12', '19']
Imputed column '5' with (834, 6)
Performing feature selection for column '6'...
Selected 11 

 40%|████      | 2/5 [05:13<07:48, 156.22s/it]

Selected 17 features for 'Porosity': ['1', '6', '7', '9', '10', '11', '12', '13', '14', '15', '20', '22', '23', '24', 'Compressive strength', 'Flexural strength', 'Flowability']
Imputed column 'Porosity' with (834, 17)
Performing feature selection for column '1'...
Selected 12 features for '1': ['5', '6', '7', '9', '10', '11', '12', '13', '14', '15', '19', '23']
Imputed column '1' with (834, 12)
Performing feature selection for column '2'...
Selected 2 features for '2': ['10', '15']
Imputed column '2' with (834, 2)
Performing feature selection for column '3'...
Selected 3 features for '3': ['5', '12', '20']
Imputed column '3' with (834, 3)
Performing feature selection for column '4'...
Selected 7 features for '4': ['3', '6', '10', '12', '15', '19', '20']
Imputed column '4' with (834, 7)
Performing feature selection for column '5'...
Selected 6 features for '5': ['1', '6', '7', '10', '12', '19']
Imputed column '5' with (834, 6)
Performing feature selection for column '6'...
Selected 11 

 60%|██████    | 3/5 [08:02<05:24, 162.04s/it]

Selected 17 features for 'Porosity': ['1', '6', '7', '9', '10', '11', '12', '13', '14', '15', '20', '22', '23', '24', 'Compressive strength', 'Flexural strength', 'Flowability']
Imputed column 'Porosity' with (834, 17)
Performing feature selection for column '1'...
Selected 12 features for '1': ['5', '6', '7', '9', '10', '11', '12', '13', '14', '15', '19', '23']
Imputed column '1' with (834, 12)
Performing feature selection for column '2'...
Selected 2 features for '2': ['10', '15']
Imputed column '2' with (834, 2)
Performing feature selection for column '3'...
Selected 3 features for '3': ['5', '12', '20']
Imputed column '3' with (834, 3)
Performing feature selection for column '4'...
Selected 7 features for '4': ['3', '6', '10', '12', '15', '19', '20']
Imputed column '4' with (834, 7)
Performing feature selection for column '5'...
Selected 6 features for '5': ['1', '6', '7', '10', '12', '19']
Imputed column '5' with (834, 6)
Performing feature selection for column '6'...
Selected 11 

 80%|████████  | 4/5 [10:44<02:42, 162.01s/it]

Selected 17 features for 'Porosity': ['1', '6', '7', '9', '10', '11', '12', '13', '14', '15', '20', '22', '23', '24', 'Compressive strength', 'Flexural strength', 'Flowability']
Imputed column 'Porosity' with (834, 17)
Performing feature selection for column '1'...
Selected 12 features for '1': ['5', '6', '7', '9', '10', '11', '12', '13', '14', '15', '19', '23']
Imputed column '1' with (834, 12)
Performing feature selection for column '2'...
Selected 2 features for '2': ['10', '15']
Imputed column '2' with (834, 2)
Performing feature selection for column '3'...
Selected 3 features for '3': ['5', '12', '20']
Imputed column '3' with (834, 3)
Performing feature selection for column '4'...
Selected 7 features for '4': ['3', '6', '10', '12', '15', '19', '20']
Imputed column '4' with (834, 7)
Performing feature selection for column '5'...
Selected 6 features for '5': ['1', '6', '7', '10', '12', '19']
Imputed column '5' with (834, 6)
Performing feature selection for column '6'...
Selected 11 

100%|██████████| 5/5 [13:11<00:00, 158.32s/it]


Selected 17 features for 'Porosity': ['1', '6', '7', '9', '10', '11', '12', '13', '14', '15', '20', '22', '23', '24', 'Compressive strength', 'Flexural strength', 'Flowability']
Imputed column 'Porosity' with (834, 17)


 80%|████████  | 4/5 [00:00<00:00, 30.92it/s]

22
['1', '2', '5', '6', '7', '10', '11', '12', '14', '15', '16', '17', '18', '19', '20', '21', '23', '24', 'Compressive strength', 'Flexural strength', 'Porosity']
(62, 21)
23
['2', '6', '7', '10', '11', '12', '15', '19', '24', 'Compressive strength', 'Flexural strength', 'Flowability']
(128, 12)
24
['1', '2', '3', '4', '6', '7', '10', '11', '12', '13', '14', '15', '19', '20', '21', '22', '23', 'Compressive strength', 'Flexural strength', 'Flowability']
(128, 20)
Compressive strength
['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '18', '19', '20', '21', '22', '23', '24', 'Flexural strength', 'Flowability', 'Porosity']
(255, 26)
Flexural strength
['1', '2', '5', '6', '7', '8', '9', '10', '12', '13', '14', '15', '16', '19', '20', '21', '22', '23', '24', 'Compressive strength', 'Porosity']
(569, 21)
Flowability
['1', '4', '5', '7', '9', '10', '12', '14', '15', '19', '23', '24', 'Compressive strength', 'Porosity']
(726, 14)
Porosity
['1', '6', '7', 

100%|██████████| 5/5 [00:00<00:00, 30.86it/s]


22
['1', '2', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '23', '24', 'Compressive strength', 'Flexural strength', 'Flowability', 'Porosity']
(62, 26)
23
['2', '7', '10', '11', '12', '15', '19', '24', 'Compressive strength', 'Flexural strength', 'Flowability']
(128, 11)
24
['1', '2', '3', '4', '6', '7', '10', '11', '12', '13', '14', '15', '19', '20', '21', '22', '23', 'Compressive strength', 'Flexural strength', 'Flowability']
(128, 20)
Compressive strength
['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '19', '20', '21', '22', '23', '24', 'Flexural strength', 'Flowability', 'Porosity']
(255, 26)
Flexural strength
['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', 'Compressive strength', 'Flowability', 'Porosity']
(569, 27)
Flowability
['1', '4', '5', '7', '9', '10', '14', '15', '19', '23', '24', 'Co

  0%|          | 0/5 [00:00<?, ?it/s]

Performing feature selection for column '1'...
Selected 12 features for '1': ['5', '6', '7', '9', '10', '11', '12', '13', '14', '15', '19', '23']
Imputed column '1' with (834, 12)
Performing feature selection for column '2'...
Selected 2 features for '2': ['10', '15']
Imputed column '2' with (834, 2)
Performing feature selection for column '3'...
Selected 5 features for '3': ['4', '5', '12', '20', '21']
Imputed column '3' with (834, 5)
Performing feature selection for column '4'...
Selected 7 features for '4': ['3', '6', '10', '12', '15', '19', '20']
Imputed column '4' with (834, 7)
Performing feature selection for column '5'...
Selected 6 features for '5': ['1', '6', '7', '10', '12', '19']
Imputed column '5' with (834, 6)
Performing feature selection for column '6'...
Selected 11 features for '6': ['1', '5', '7', '10', '12', '14', '15', '18', '19', '20', '21']
Imputed column '6' with (834, 11)
Performing feature selection for column '7'...
Selected 7 features for '7': ['1', '2', '5', 

 20%|██        | 1/5 [03:05<12:21, 185.30s/it]

Selected 17 features for 'Porosity': ['1', '6', '7', '9', '10', '11', '12', '13', '14', '15', '20', '22', '23', '24', 'Compressive strength', 'Flexural strength', 'Flowability']
Imputed column 'Porosity' with (834, 17)
Performing feature selection for column '1'...
Selected 12 features for '1': ['5', '6', '7', '9', '10', '11', '12', '13', '14', '15', '19', '23']
Imputed column '1' with (834, 12)
Performing feature selection for column '2'...
Selected 2 features for '2': ['10', '15']
Imputed column '2' with (834, 2)
Performing feature selection for column '3'...
Selected 3 features for '3': ['5', '12', '20']
Imputed column '3' with (834, 3)
Performing feature selection for column '4'...
Selected 7 features for '4': ['3', '6', '10', '12', '15', '19', '20']
Imputed column '4' with (834, 7)
Performing feature selection for column '5'...
Selected 6 features for '5': ['1', '6', '7', '10', '12', '19']
Imputed column '5' with (834, 6)
Performing feature selection for column '6'...
Selected 11 

 40%|████      | 2/5 [05:44<08:29, 169.83s/it]

Selected 17 features for 'Porosity': ['1', '6', '7', '9', '10', '11', '12', '13', '14', '15', '20', '22', '23', '24', 'Compressive strength', 'Flexural strength', 'Flowability']
Imputed column 'Porosity' with (834, 17)
Performing feature selection for column '1'...
Selected 12 features for '1': ['5', '6', '7', '9', '10', '11', '12', '13', '14', '15', '19', '23']
Imputed column '1' with (834, 12)
Performing feature selection for column '2'...
Selected 2 features for '2': ['10', '15']
Imputed column '2' with (834, 2)
Performing feature selection for column '3'...
Selected 3 features for '3': ['5', '12', '20']
Imputed column '3' with (834, 3)
Performing feature selection for column '4'...
Selected 7 features for '4': ['3', '6', '10', '12', '15', '19', '20']
Imputed column '4' with (834, 7)
Performing feature selection for column '5'...
Selected 6 features for '5': ['1', '6', '7', '10', '12', '19']
Imputed column '5' with (834, 6)
Performing feature selection for column '6'...
Selected 11 

 60%|██████    | 3/5 [08:31<05:37, 168.81s/it]

Selected 17 features for 'Porosity': ['1', '6', '7', '9', '10', '11', '12', '13', '14', '15', '20', '22', '23', '24', 'Compressive strength', 'Flexural strength', 'Flowability']
Imputed column 'Porosity' with (834, 17)
Performing feature selection for column '1'...
Selected 12 features for '1': ['5', '6', '7', '9', '10', '11', '12', '13', '14', '15', '19', '23']
Imputed column '1' with (834, 12)
Performing feature selection for column '2'...
Selected 2 features for '2': ['10', '15']
Imputed column '2' with (834, 2)
Performing feature selection for column '3'...
Selected 3 features for '3': ['5', '12', '20']
Imputed column '3' with (834, 3)
Performing feature selection for column '4'...
Selected 7 features for '4': ['3', '6', '10', '12', '15', '19', '20']
Imputed column '4' with (834, 7)
Performing feature selection for column '5'...
Selected 6 features for '5': ['1', '6', '7', '10', '12', '19']
Imputed column '5' with (834, 6)
Performing feature selection for column '6'...
Selected 11 

 80%|████████  | 4/5 [11:17<02:47, 167.70s/it]

Selected 17 features for 'Porosity': ['1', '6', '7', '9', '10', '11', '12', '13', '14', '15', '20', '22', '23', '24', 'Compressive strength', 'Flexural strength', 'Flowability']
Imputed column 'Porosity' with (834, 17)
Performing feature selection for column '1'...
Selected 12 features for '1': ['5', '6', '7', '9', '10', '11', '12', '13', '14', '15', '19', '23']
Imputed column '1' with (834, 12)
Performing feature selection for column '2'...
Selected 2 features for '2': ['10', '15']
Imputed column '2' with (834, 2)
Performing feature selection for column '3'...
Selected 3 features for '3': ['5', '12', '20']
Imputed column '3' with (834, 3)
Performing feature selection for column '4'...
Selected 7 features for '4': ['3', '6', '10', '12', '15', '19', '20']
Imputed column '4' with (834, 7)
Performing feature selection for column '5'...
Selected 6 features for '5': ['1', '6', '7', '10', '12', '19']
Imputed column '5' with (834, 6)
Performing feature selection for column '6'...
Selected 11 

100%|██████████| 5/5 [14:01<00:00, 168.27s/it]


Selected 17 features for 'Porosity': ['1', '6', '7', '9', '10', '11', '12', '13', '14', '15', '20', '22', '23', '24', 'Compressive strength', 'Flexural strength', 'Flowability']
Imputed column 'Porosity' with (834, 17)


  0%|          | 0/5 [00:00<?, ?it/s]

22
['1', '2', '5', '6', '7', '10', '11', '12', '14', '15', '16', '17', '18', '19', '20', '21', '23', '24', 'Compressive strength', 'Flexural strength', 'Porosity']
(62, 21)
23
['2', '6', '7', '10', '11', '12', '15', '19', '24', 'Compressive strength', 'Flexural strength', 'Flowability']
(128, 12)
24
['1', '2', '3', '4', '6', '7', '10', '11', '12', '13', '14', '15', '19', '20', '21', '22', '23', 'Compressive strength', 'Flexural strength', 'Flowability']
(128, 20)
Compressive strength
['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '18', '19', '20', '21', '22', '23', '24', 'Flexural strength', 'Flowability', 'Porosity']
(255, 26)
Flexural strength
['1', '2', '5', '6', '7', '8', '9', '10', '12', '13', '14', '15', '16', '19', '20', '21', '22', '23', '24', 'Compressive strength', 'Porosity']
(569, 21)
Flowability
['1', '4', '5', '7', '9', '10', '12', '14', '15', '19', '23', '24', 'Compressive strength', 'Porosity']
(726, 14)
Porosity
['1', '6', '7', 

 60%|██████    | 3/5 [00:00<00:00, 25.05it/s]

22
['1', '2', '5', '6', '7', '9', '10', '11', '12', '14', '15', '16', '17', '18', '19', '20', '21', '23', '24', 'Compressive strength', 'Flexural strength', 'Porosity']
(62, 22)
23
['2', '7', '10', '11', '12', '15', '19', '24', 'Compressive strength', 'Flexural strength', 'Flowability']
(128, 11)
24
['1', '2', '3', '4', '6', '7', '10', '11', '12', '13', '14', '15', '19', '20', '21', '22', '23', 'Compressive strength', 'Flexural strength', 'Flowability']
(128, 20)
Compressive strength
['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '19', '20', '21', '22', '23', '24', 'Flexural strength', 'Flowability', 'Porosity']
(255, 26)
Flexural strength
['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '19', '20', '21', '22', '23', '24', 'Compressive strength', 'Porosity']
(569, 24)
Flowability
['1', '3', '7', '9', '10', '12', '14', '15', '19', '23', '24', 'Compressive strength', 'Porosity']
(726, 13)
Porosity
['1',

100%|██████████| 5/5 [00:00<00:00, 23.40it/s]


Porosity
['1', '6', '7', '9', '10', '11', '12', '13', '14', '15', '20', '22', '23', '24', 'Compressive strength', 'Flexural strength', 'Flowability']
(754, 17)
[WARN] 插补 `RFE-MissForest` 失败：Argument `x` must contains at least one missing value.
[WARN] 插补方法 `堆叠` 暂未实现，跳过。
[WARN] 插补方法 `VAE` 暂未实现，跳过。


In [12]:
results = [
    ('DirectMissingModels', direct_df),
    ('FeatureMissingModels', feature_df),
    ('CompleteDataModels', complete_df),
]
for name, df in results:
    print(f'===== {name} =====')
    display(df)

direct_df, feature_df, complete_df


===== DirectMissingModels =====


Unnamed: 0,r2,mse,mae,model
0,0.281779,25.951047,3.519085,catboost
1,-19.521204,290.730461,13.623339,mt_extra_trees


===== FeatureMissingModels =====


Unnamed: 0,r2,mse,mae,model
0,-0.079393,28.767797,3.910433,lightgbm__missforest
1,-0.090437,36.086910,4.287907,hist_gbr__missforest
2,-0.250647,70.822641,5.356924,xgboost__missforest
3,-15.431400,3726.664091,37.638698,hmlasso__missforest
4,-0.061264,85.020908,5.641177,sst_xgb__missforest
...,...,...,...,...
67,-2.533169,68.647769,6.696385,hist_gbr__低秩矩阵
68,-2.531965,69.498225,6.718248,xgboost__低秩矩阵
69,-15.327873,3817.993583,37.517151,hmlasso__低秩矩阵
70,-2.570315,85.032236,7.009485,sst_xgb__低秩矩阵


===== CompleteDataModels =====


Unnamed: 0,r2,mse,mae,model
0,0.143447,32.290310,3.906505,random_forest__missforest
1,-0.222886,103.576936,5.813481,ridge__missforest
2,-0.236324,106.893202,5.881168,multitask_lasso__missforest
3,-0.230238,105.385173,5.849830,multitask_elasticnet__missforest
4,-0.010312,98.740717,5.894942,multioutput_gp__missforest
...,...,...,...,...
83,-4.106590,566.344257,16.038237,multitask_elasticnet__低秩矩阵
84,-4.433976,440.990738,14.872563,multioutput_gp__低秩矩阵
85,-3.657159,263.399370,12.053952,multioutput_svr__低秩矩阵
86,-2.815322,125.260761,8.600001,gbdt__低秩矩阵


(          r2         mse        mae           model
 0   0.281779   25.951047   3.519085        catboost
 1 -19.521204  290.730461  13.623339  mt_extra_trees,
            r2          mse        mae                 model
 0   -0.079393    28.767797   3.910433  lightgbm__missforest
 1   -0.090437    36.086910   4.287907  hist_gbr__missforest
 2   -0.250647    70.822641   5.356924   xgboost__missforest
 3  -15.431400  3726.664091  37.638698   hmlasso__missforest
 4   -0.061264    85.020908   5.641177   sst_xgb__missforest
 ..        ...          ...        ...                   ...
 67  -2.533169    68.647769   6.696385        hist_gbr__低秩矩阵
 68  -2.531965    69.498225   6.718248         xgboost__低秩矩阵
 69 -15.327873  3817.993583  37.517151         hmlasso__低秩矩阵
 70  -2.570315    85.032236   7.009485         sst_xgb__低秩矩阵
 71  -2.765058    76.429619   7.311319         erc_xgb__低秩矩阵
 
 [72 rows x 4 columns],
           r2         mse        mae                             model
 0   0.1434