In [1]:
import numpy as np
import pandas as pd 
import os
import joblib
from tqdm import tqdm

from sklearn.feature_selection import SelectFromModel
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.model_selection import GridSearchCV
from pyod.models.knn import KNN
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_percentage_error, r2_score, mean_absolute_error, mean_squared_error
from datetime import datetime

import warnings
warnings.filterwarnings('ignore')

In [2]:
def convert_realtime_data(data, target_column, window_size):
    df_real_time = data.copy()
    new_elements = df_real_time.columns.tolist()
    new_elements.remove("时间")
    new_elements.remove(target_column[0])
    
    # 每个变量都进行shift
    shift_cols = []
    shift_col_names = []
    for i in range(len(new_elements)):
        for window in window_size:
            shift_cols.append(df_real_time[new_elements[i]].shift(window))
            shift_col_names.append(f'{new_elements[i]}_{window}')
    
    # 按照rolling_window计算统计特征
    stats_cols = []
    stats_col_names = []
    for window in window_size[:-2]:
        f_min = lambda x: x.rolling(window=window).min()
        f_max = lambda x: x.rolling(window=window).max()
        f_mean = lambda x: x.rolling(window=window).mean()
        f_std = lambda x: x.rolling(window=window).std()
        f_median = lambda x: x.rolling(window=window).median()
        f_skew = lambda x:x.rolling(window=window).skew()
        function_list = [f_min, f_max, f_mean, f_std, f_median, f_skew]
        function_name = ['min', 'max', 'mean', 'std', 'median', 'skew']
        for k in range(len(function_list)):
            stats_cols.append(df_real_time[new_elements].apply(function_list[k]))
            stats_col_names.extend(pd.Series(df_real_time[new_elements].columns)
                                   .apply(lambda x:x+str(f'_{window}_{function_name[k]}')).tolist())
    
    x_shift = pd.concat(shift_cols, axis=1)
    x_shift.columns = shift_col_names
    
    x_stats = pd.concat(stats_cols, axis=1)
    x_stats.columns = stats_col_names
    
    df_real_time = pd.concat([df_real_time, x_shift, x_stats], axis=1)
    
    # 提取时间性的特征
    df_real_time['Day'] = df_real_time['时间'].dt.day
    df_real_time['DayOfWeek'] = df_real_time['时间'].dt.dayofweek
    df_real_time['Hour'] = df_real_time['时间'].dt.hour 
    df_real_time['minute'] = df_real_time['时间'].dt.minute 

    df_real_time = df_real_time.dropna(axis=0, how='any').reset_index(drop=True)
    return df_real_time

In [3]:
def fit_realtime_model(data:pd.DataFrame, internal_factors:list, external_factors:list, target_column:list):
    # 数据类型更改和校验、数据拼接
    data['时间'] = pd.to_datetime(data['时间'])
    data = data.infer_objects()
    data.columns =[col.strip() for col in data.columns]
    internal_factors = list(map(str.strip, internal_factors))
    external_factors = list(map(str.strip, external_factors))
    target_column = list(map(str.strip, target_column))
    df = data[list(set(['时间'] + internal_factors + external_factors + target_column))]
    # 导入数据的维度
    shape1 = str(df.shape)
    
    # 根据启机条件筛选数据、缺失值填充
    df = df.query("`10MKA10CE301XQ01` > 50")
    df = df.fillna(df.mean()).reset_index(drop=True)
    # 筛选后数据的维度
    shape2 = str(df.shape)
    
    # 根据时间区分训练集（"时间 < '2021-01-22 00:00:00'"）、
    # 验证集（"时间 >= '2021-01-22 00:00:00' & 时间 < '2021-01-27 00:00:00'"）、
    # 测试集（"时间 >= '2021-01-27 00:00:00'"）
    df_train = df.query("时间 < '2021-01-28 00:00:00'").reset_index(drop=True)
    df_val = df.query("时间 >= '2021-01-28 00:00:00' & 时间 < '2021-01-30 00:00:00'").reset_index(drop=True)
    df_test = df.query("时间 >= '2021-01-30 00:00:00'").reset_index(drop=True)
    # 训练集数据
    shape3 = str(df_train.shape)
    # 验证集数据
    shape4 = str(df_val.shape)
    # 测试集数据
    shape5 = str(df_test.shape)
    
    # 利用KNN进行异常识别，筛选正常的测试集数据
#     train_cols = internal_factors + external_factors
#     train_cols.remove(target_column[0])
#     clf = KNN(contamination=0.05)
#     clf.fit(df_train[train_cols]) # 使用X_train训练检测器clf
#     y_train_pred = clf.labels_  # 返回训练数据上的分类标签 (0: 正常值, 1: 异常值)
#     y_train_scores = clf.decision_scores_  # 返回训练数据上的异常值 (分值越大越异常)
#     df_train['y_train_pred'] = y_train_pred
#     df_train['y_train_scores'] = y_train_scores
#     df_training = (df_train[df_train['y_train_pred']==0]
#                             .drop(['y_train_pred', 'y_train_scores'], axis=1)
#                             .reset_index(drop=True))
#     # 异常剔除后训练集
#     shape6 = str(df_training.shape)

    # 特征提取
    df_train_featured = convert_realtime_data(df_train, target_column, [5,4,3,2,1])
    df_val_featured = convert_realtime_data(df_val, target_column, [5,4,3,2,1])
    df_test_featured = convert_realtime_data(df_test, target_column, [5,4,3,2,1])
    train_featured_shape = str(df_train_featured.shape)
    val_featured_shape = str(df_val_featured.shape)
    test_featured_shape = str(df_test_featured.shape)
    
    # 定义自变量和因变量
    train_cols = df_train_featured.columns.tolist()
    train_cols.remove('时间') 
    train_cols.remove(target_column[0])
    x_train, y_train = df_train_featured[train_cols], df_train_featured[target_column]
    x_val, y_val = df_val_featured[train_cols], df_val_featured[target_column]
    x_test, y_test = df_test_featured[train_cols], df_test_featured[target_column]

    # 特征选择
    reg = RandomForestRegressor(n_jobs=-1) 
    reg.fit(x_train.values, y_train.values) 
    feature_model = SelectFromModel(reg, prefit=True, max_features=int(x_train.shape[1]*0.8))
    x_train_new = feature_model.transform(x_train)
    model_selected_columns = x_train.columns[feature_model.get_support()]
    shape6 = str(x_train_new.shape)
    
    # 随机森林
    pipe = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value=-999)),
    ('model', RandomForestRegressor())])
    param_grid = {'model__n_estimators':[*range(10, 300, 10)],
                  'model__max_depth':[*range(10, 100, 10)]}
    # 网格搜索
    grid = GridSearchCV(estimator=pipe, param_grid=param_grid, cv=3, n_jobs=-1) 
    grid.fit(x_train_new, y_train.values)
    
    # 保存模型
    joblib.dump(grid, f'D:/Jupyterfiles/realtime_model/ybl_{target_column[0]}.pkl')
    
    # 在验证集上的结果(mape,r2,mae,mse)、真实值和预测值的最大差异
    #   如果最大差异有大于5的，则进行下一步：挑选出差异大于5的这部分数据，再验证是否为连续的时间
    pred_val_value = grid.predict(x_val[model_selected_columns].values)
    val_mape = mean_absolute_percentage_error(y_val.values, pred_val_value)
    val_r2 = r2_score(y_val.values, pred_val_value)
    val_mae = mean_absolute_error(y_val.values, pred_val_value)
    val_mse = mean_squared_error(y_val.values, pred_val_value)
    val_max_diff_value = max(abs(y_val.values.ravel() - pred_val_value))
    
    
    # 在测试集上的结果(mape,r2,mae,mse)、真实值和预测值的最大差异
    #   如果最大差异有大于5的，则进行下一步：挑选出差异大于5的这部分数据，再验证是否为连续的时间
    pred_test_value = grid.predict(x_test[model_selected_columns].values)
    test_mape = mean_absolute_percentage_error(y_test.values, pred_test_value)
    test_r2 = r2_score(y_test.values, pred_test_value)
    test_mae = mean_absolute_error(y_test.values, pred_test_value)
    test_mse = mean_squared_error(y_test.values, pred_test_value)
    test_max_diff_value = max(abs(y_test.values.ravel() - pred_test_value))

    res = pd.DataFrame({'因变量': target_column,
                        '导入数据的维度':shape1, '筛选后数据的维度':shape2, '训练集数据':shape3, 
                        '验证集数据':shape4, '测试集数据':shape5, '训练集特征提取':train_featured_shape,
                        '验证集特征提取':val_featured_shape, '测试集特征提取':test_featured_shape, '特征选择后训练集':shape6, 
                        
                        'val_mape':val_mape, 'val_r2':val_r2, 'val_mae':val_mae, 'val_mse':val_mse, 
                        'val_max_diff_value':val_max_diff_value,
                        
                        'test_mape':test_mape, 'test_r2':test_r2, 'test_mae':test_mae, 'test_mse':test_mse,
                        'test_max_diff_value':test_max_diff_value}, index=[1])
    return res 

In [4]:
root = 'D:/Jupyterfiles/ThermalData/2021-01/'
file_paths = os.listdir(root)

data = pd.read_excel(os.path.join(root, file_paths[0]))
for i in range(1, len(file_paths)):
    single_file_data = pd.read_excel(os.path.join(root, file_paths[i]))
    data = pd.merge(data, single_file_data, how='left', on=['时间'])

In [1]:
internal_factors = ['10MKA10CE301XQ01', '10MKA10CE301XQ02', '10MKA10CE101XQ01', '10MKA10CE101XQ02', '10MKA10CE101XQ03',
                  '10MKF30CT302', '10MKG10CT301', '10MKG10CT303', '10MKG10CT305', '10MKG10CT307']

external_factors = ['10MKA01CT301','10MKA01CT303','10MKA01CT305','10MKA01CT307','10MKA01CT309','10MKA01CT311',
                    '10MKA01CT313','10MKA01CT315','10MKA01CT317','10MKA01CT319','10MKA01CT321','10MKA01CT323',
                    '10MKA01CT325','10MKA01CT327','10MKA01CT329','10MKA01CT331','10MKA01CT333','10MKA01CT335',
                    '10MKA01CT337','10MKA01CT339','10MKA01CT341','10MKA01CT343','10MKA01CT345','10MKA01CT347',
                    '10MKA01CT349','10MKA01CT351','10MKA01CT353','10MKA01CT355','10MKA01CT357','10MKA01CT359',
                    '10MKA01CT361','10MKA01CT363','10MKA01CT365','10MKA01CT367','10MKA01CT369','10MKA01CT371','10MKA01CT373',
                    '10MKA01CT375','10MKA01CT377','10MKA01CT379','10MKA01CT381','10MKA01CT383','10MKA01CT601','10MKA01CT602',
                    '10MKA01CT603','10MKA01CT604','10MKA01CT605','10MKA01CT606','10MKA01CT607','10MKA01CT608','10MKA01CT609',
                    '10MKA01CT610','10MKA01CT611','10MKA01CT612','10MKA01CT613','10MKA01CT614','10MKA01CT615','10MKA01CT616',
                    '10MKA01CT617','10MKA01CT618']

In [6]:
time1 = datetime.now()
res_all = pd.DataFrame(columns=['因变量', '导入数据的维度', '筛选后数据的维度', '训练集数据', 
                                '验证集数据', '测试集数据', '训练集特征提取', '验证集特征提取', '测试集特征提取',
                                '特征选择后训练集', 
                                'val_mape', 'val_r2', 'val_mae', 'val_mse', 'val_max_diff_value',
                                'test_mape', 'test_r2', 'test_mae', 'test_mse', 'test_max_diff_value'])
for i in tqdm(range(len(external_factors))): 
    print('当前因变量为：', external_factors[i])
    res = fit_realtime_model(data=data, internal_factors=internal_factors, external_factors=external_factors, 
                             target_column=[external_factors[i]])
    res_all = pd.concat([res_all, res], axis=0, ignore_index=True)
    print('*'*50)
    
time2 = datetime.now()
print('一共耗时：', time2-time1)

  0%|                                                                                           | 0/60 [00:00<?, ?it/s]

当前因变量为： 10MKA01CT301


  2%|█▎                                                                              | 1/60 [01:41<1:39:47, 101.49s/it]

**************************************************
当前因变量为： 10MKA01CT303


  3%|██▋                                                                              | 2/60 [03:08<1:29:43, 92.83s/it]

**************************************************
当前因变量为： 10MKA01CT305


  5%|████                                                                             | 3/60 [04:06<1:13:09, 77.00s/it]

**************************************************
当前因变量为： 10MKA01CT307


  7%|█████▍                                                                           | 4/60 [05:32<1:15:12, 80.59s/it]

**************************************************
当前因变量为： 10MKA01CT309


  8%|██████▊                                                                          | 5/60 [06:51<1:13:22, 80.05s/it]

**************************************************
当前因变量为： 10MKA01CT311


 10%|████████                                                                         | 6/60 [08:17<1:13:45, 81.95s/it]

**************************************************
当前因变量为： 10MKA01CT313


 12%|█████████▍                                                                       | 7/60 [09:46<1:14:24, 84.23s/it]

**************************************************
当前因变量为： 10MKA01CT315


 13%|██████████▊                                                                      | 8/60 [11:22<1:16:17, 88.04s/it]

**************************************************
当前因变量为： 10MKA01CT317


 15%|████████████▏                                                                    | 9/60 [12:52<1:15:23, 88.69s/it]

**************************************************
当前因变量为： 10MKA01CT319


 17%|█████████████▎                                                                  | 10/60 [14:07<1:10:19, 84.39s/it]

**************************************************
当前因变量为： 10MKA01CT321


 18%|██████████████▋                                                                 | 11/60 [15:35<1:09:52, 85.56s/it]

**************************************************
当前因变量为： 10MKA01CT323


 20%|████████████████                                                                | 12/60 [16:58<1:07:49, 84.77s/it]

**************************************************
当前因变量为： 10MKA01CT325


 22%|█████████████████▎                                                              | 13/60 [18:43<1:11:13, 90.93s/it]

**************************************************
当前因变量为： 10MKA01CT327


 23%|██████████████████▋                                                             | 14/60 [20:28<1:12:58, 95.19s/it]

**************************************************
当前因变量为： 10MKA01CT329


 25%|████████████████████                                                            | 15/60 [21:51<1:08:34, 91.43s/it]

**************************************************
当前因变量为： 10MKA01CT331


 27%|█████████████████████▎                                                          | 16/60 [23:22<1:06:58, 91.33s/it]

**************************************************
当前因变量为： 10MKA01CT333


 28%|██████████████████████▋                                                         | 17/60 [24:48<1:04:22, 89.82s/it]

**************************************************
当前因变量为： 10MKA01CT335


 30%|████████████████████████                                                        | 18/60 [26:10<1:01:15, 87.50s/it]

**************************************************
当前因变量为： 10MKA01CT337


 32%|█████████████████████████▎                                                      | 19/60 [27:48<1:01:54, 90.59s/it]

**************************************************
当前因变量为： 10MKA01CT339


 33%|██████████████████████████▋                                                     | 20/60 [29:30<1:02:45, 94.13s/it]

**************************************************
当前因变量为： 10MKA01CT341


 35%|████████████████████████████                                                    | 21/60 [31:08<1:01:51, 95.17s/it]

**************************************************
当前因变量为： 10MKA01CT343


 37%|█████████████████████████████▎                                                  | 22/60 [32:56<1:02:37, 98.89s/it]

**************************************************
当前因变量为： 10MKA01CT345


 38%|███████████████████████████████▍                                                  | 23/60 [34:29<59:53, 97.12s/it]

**************************************************
当前因变量为： 10MKA01CT347


 40%|████████████████████████████████▊                                                 | 24/60 [36:01<57:20, 95.57s/it]

**************************************************
当前因变量为： 10MKA01CT349


 42%|██████████████████████████████████▏                                               | 25/60 [37:35<55:37, 95.37s/it]

**************************************************
当前因变量为： 10MKA01CT351


 43%|███████████████████████████████████▌                                              | 26/60 [39:22<56:00, 98.85s/it]

**************************************************
当前因变量为： 10MKA01CT353


 45%|████████████████████████████████████▍                                            | 27/60 [41:13<56:16, 102.32s/it]

**************************************************
当前因变量为： 10MKA01CT355


 47%|█████████████████████████████████████▊                                           | 28/60 [43:15<57:47, 108.37s/it]

**************************************************
当前因变量为： 10MKA01CT357


 48%|███████████████████████████████████████▏                                         | 29/60 [44:46<53:10, 102.94s/it]

**************************************************
当前因变量为： 10MKA01CT359


 50%|█████████████████████████████████████████                                         | 30/60 [46:00<47:09, 94.31s/it]

**************************************************
当前因变量为： 10MKA01CT361


 52%|██████████████████████████████████████████▎                                       | 31/60 [47:08<41:47, 86.46s/it]

**************************************************
当前因变量为： 10MKA01CT363


 53%|███████████████████████████████████████████▋                                      | 32/60 [48:46<41:55, 89.85s/it]

**************************************************
当前因变量为： 10MKA01CT365


 55%|█████████████████████████████████████████████                                     | 33/60 [50:33<42:47, 95.08s/it]

**************************************************
当前因变量为： 10MKA01CT367


 57%|██████████████████████████████████████████████▍                                   | 34/60 [52:04<40:41, 93.92s/it]

**************************************************
当前因变量为： 10MKA01CT369


 58%|███████████████████████████████████████████████▎                                 | 35/60 [54:02<42:09, 101.20s/it]

**************************************************
当前因变量为： 10MKA01CT371


 60%|█████████████████████████████████████████████████▏                                | 36/60 [55:35<39:30, 98.77s/it]

**************************************************
当前因变量为： 10MKA01CT373


 62%|█████████████████████████████████████████████████▉                               | 37/60 [57:24<39:02, 101.84s/it]

**************************************************
当前因变量为： 10MKA01CT375


 63%|███████████████████████████████████████████████████▉                              | 38/60 [58:39<34:17, 93.52s/it]

**************************************************
当前因变量为： 10MKA01CT377


 65%|████████████████████████████████████████████████████                            | 39/60 [1:00:23<33:52, 96.78s/it]

**************************************************
当前因变量为： 10MKA01CT379


 67%|█████████████████████████████████████████████████████▎                          | 40/60 [1:01:47<30:58, 92.93s/it]

**************************************************
当前因变量为： 10MKA01CT381


 68%|██████████████████████████████████████████████████████▋                         | 41/60 [1:03:08<28:16, 89.29s/it]

**************************************************
当前因变量为： 10MKA01CT383


 70%|████████████████████████████████████████████████████████                        | 42/60 [1:04:58<28:38, 95.50s/it]

**************************************************
当前因变量为： 10MKA01CT601


 72%|█████████████████████████████████████████████████████████▎                      | 43/60 [1:06:29<26:40, 94.12s/it]

**************************************************
当前因变量为： 10MKA01CT602


 73%|██████████████████████████████████████████████████████████▋                     | 44/60 [1:07:34<22:50, 85.63s/it]

**************************************************
当前因变量为： 10MKA01CT603


 75%|████████████████████████████████████████████████████████████                    | 45/60 [1:08:46<20:20, 81.35s/it]

**************************************************
当前因变量为： 10MKA01CT604


 77%|█████████████████████████████████████████████████████████████▎                  | 46/60 [1:09:46<17:31, 75.09s/it]

**************************************************
当前因变量为： 10MKA01CT605


 78%|██████████████████████████████████████████████████████████████▋                 | 47/60 [1:10:47<15:19, 70.76s/it]

**************************************************
当前因变量为： 10MKA01CT606


 80%|████████████████████████████████████████████████████████████████                | 48/60 [1:11:47<13:31, 67.59s/it]

**************************************************
当前因变量为： 10MKA01CT607


 82%|████████████████████████████████████████████████████████████████▌              | 49/60 [1:15:54<22:15, 121.39s/it]

**************************************************
当前因变量为： 10MKA01CT608


 83%|█████████████████████████████████████████████████████████████████▊             | 50/60 [1:20:04<26:39, 159.98s/it]

**************************************************
当前因变量为： 10MKA01CT609


 85%|███████████████████████████████████████████████████████████████████▏           | 51/60 [1:23:14<25:20, 168.91s/it]

**************************************************
当前因变量为： 10MKA01CT610


 87%|████████████████████████████████████████████████████████████████████▍          | 52/60 [1:25:54<22:10, 166.35s/it]

**************************************************
当前因变量为： 10MKA01CT611


 88%|█████████████████████████████████████████████████████████████████████▊         | 53/60 [1:28:42<19:27, 166.80s/it]

**************************************************
当前因变量为： 10MKA01CT612


 90%|███████████████████████████████████████████████████████████████████████        | 54/60 [1:30:55<15:39, 156.57s/it]

**************************************************
当前因变量为： 10MKA01CT613


 92%|████████████████████████████████████████████████████████████████████████▍      | 55/60 [1:32:12<11:03, 132.69s/it]

**************************************************
当前因变量为： 10MKA01CT614


 93%|█████████████████████████████████████████████████████████████████████████▋     | 56/60 [1:33:08<07:19, 109.88s/it]

**************************************************
当前因变量为： 10MKA01CT615


 95%|███████████████████████████████████████████████████████████████████████████    | 57/60 [1:34:29<05:03, 101.07s/it]

**************************************************
当前因变量为： 10MKA01CT616


 97%|█████████████████████████████████████████████████████████████████████████████▎  | 58/60 [1:35:26<02:55, 87.77s/it]

**************************************************
当前因变量为： 10MKA01CT617


 98%|█████████████████████████████████████████████████████████████████████████████▋ | 59/60 [1:39:30<02:14, 134.82s/it]

**************************************************
当前因变量为： 10MKA01CT618


100%|███████████████████████████████████████████████████████████████████████████████| 60/60 [1:41:13<00:00, 101.23s/it]

**************************************************
一共耗时： 1:41:13.905513





In [10]:
res_all

Unnamed: 0,因变量,导入数据的维度,筛选后数据的维度,训练集数据,验证集数据,测试集数据,训练集特征提取,验证集特征提取,测试集特征提取,特征选择后训练集,val_mape,val_r2,val_mae,val_mse,val_max_diff_value,test_mape,test_r2,test_mae,test_mse,test_max_diff_value
0,10MKA01CT301,"(1489, 71)","(1489, 71)","(1296, 71)","(96, 71)","(97, 71)","(1291, 1662)","(91, 1662)","(92, 1662)","(1291, 30)",0.000701,0.999611,0.037986,0.002811,0.141749,0.000621,0.999694,0.033247,0.002144,0.127167
1,10MKA01CT303,"(1489, 71)","(1489, 71)","(1296, 71)","(96, 71)","(97, 71)","(1291, 1662)","(91, 1662)","(92, 1662)","(1291, 32)",0.00058,0.999746,0.031558,0.0019,0.12278,0.000547,0.999761,0.029678,0.001739,0.133941
2,10MKA01CT305,"(1489, 71)","(1489, 71)","(1296, 71)","(96, 71)","(97, 71)","(1291, 1662)","(91, 1662)","(92, 1662)","(1291, 15)",0.000602,0.999782,0.033829,0.002091,0.118237,0.000843,0.999586,0.046734,0.003818,0.173314
3,10MKA01CT307,"(1489, 71)","(1489, 71)","(1296, 71)","(96, 71)","(97, 71)","(1291, 1662)","(91, 1662)","(92, 1662)","(1291, 31)",0.000696,0.999541,0.037584,0.002862,0.143139,0.000833,0.999422,0.044323,0.003443,0.154179
4,10MKA01CT309,"(1489, 71)","(1489, 71)","(1296, 71)","(96, 71)","(97, 71)","(1291, 1662)","(91, 1662)","(92, 1662)","(1291, 28)",0.000591,0.999701,0.031562,0.00194,0.117163,0.000518,0.999731,0.027729,0.001673,0.115919
5,10MKA01CT311,"(1489, 71)","(1489, 71)","(1296, 71)","(96, 71)","(97, 71)","(1291, 1662)","(91, 1662)","(92, 1662)","(1291, 31)",0.000636,0.99971,0.034297,0.002166,0.114985,0.000531,0.999775,0.028558,0.001625,0.120337
6,10MKA01CT313,"(1489, 71)","(1489, 71)","(1296, 71)","(96, 71)","(97, 71)","(1291, 1662)","(91, 1662)","(92, 1662)","(1291, 31)",0.001885,0.996812,0.102191,0.020204,0.452931,0.011295,0.929214,0.606766,0.42198,1.440109
7,10MKA01CT315,"(1489, 71)","(1489, 71)","(1296, 71)","(96, 71)","(97, 71)","(1291, 1662)","(91, 1662)","(92, 1662)","(1291, 34)",0.000701,0.999645,0.037853,0.002498,0.112522,0.000646,0.999675,0.034565,0.002223,0.122113
8,10MKA01CT317,"(1489, 71)","(1489, 71)","(1296, 71)","(96, 71)","(97, 71)","(1291, 1662)","(91, 1662)","(92, 1662)","(1291, 30)",0.00064,0.999735,0.034903,0.002016,0.120361,0.000601,0.999745,0.032551,0.001883,0.11658
9,10MKA01CT319,"(1489, 71)","(1489, 71)","(1296, 71)","(96, 71)","(97, 71)","(1291, 1662)","(91, 1662)","(92, 1662)","(1291, 21)",0.000797,0.999611,0.043338,0.003102,0.128286,0.000766,0.999629,0.041201,0.002835,0.134443


In [11]:
res_all.query('val_r2 < 0.9')

Unnamed: 0,因变量,导入数据的维度,筛选后数据的维度,训练集数据,验证集数据,测试集数据,训练集特征提取,验证集特征提取,测试集特征提取,特征选择后训练集,val_mape,val_r2,val_mae,val_mse,val_max_diff_value,test_mape,test_r2,test_mae,test_mse,test_max_diff_value
49,10MKA01CT608,"(1489, 71)","(1489, 71)","(1296, 71)","(96, 71)","(97, 71)","(1291, 1662)","(91, 1662)","(92, 1662)","(1291, 47)",0.001493,0.885787,0.067018,0.007509,0.247374,0.002477,0.783076,0.11085,0.018662,0.376787
50,10MKA01CT609,"(1489, 71)","(1489, 71)","(1296, 71)","(96, 71)","(97, 71)","(1291, 1662)","(91, 1662)","(92, 1662)","(1291, 31)",0.001921,0.877375,0.087388,0.012027,0.328301,0.002074,0.902282,0.094246,0.012406,0.253657
58,10MKA01CT617,"(1489, 71)","(1489, 71)","(1296, 71)","(96, 71)","(97, 71)","(1291, 1662)","(91, 1662)","(92, 1662)","(1291, 43)",0.001958,0.875026,0.090418,0.012783,0.336212,0.001714,0.875704,0.07907,0.010244,0.234519


In [9]:
res_all.query('test_r2 < 0.9')

Unnamed: 0,因变量,导入数据的维度,筛选后数据的维度,训练集数据,验证集数据,测试集数据,训练集特征提取,验证集特征提取,测试集特征提取,特征选择后训练集,val_mape,val_r2,val_mae,val_mse,val_max_diff_value,test_mape,test_r2,test_mae,test_mse,test_max_diff_value
42,10MKA01CT601,"(1489, 71)","(1489, 71)","(1296, 71)","(96, 71)","(97, 71)","(1291, 1662)","(91, 1662)","(92, 1662)","(1291, 10)",0.002858,0.919195,0.135726,0.030714,0.473658,0.004025,0.863989,0.191174,0.052,0.515377
48,10MKA01CT607,"(1489, 71)","(1489, 71)","(1296, 71)","(96, 71)","(97, 71)","(1291, 1662)","(91, 1662)","(92, 1662)","(1291, 48)",0.001181,0.925701,0.052754,0.004499,0.208621,0.001945,0.872077,0.086898,0.010264,0.22837
49,10MKA01CT608,"(1489, 71)","(1489, 71)","(1296, 71)","(96, 71)","(97, 71)","(1291, 1662)","(91, 1662)","(92, 1662)","(1291, 47)",0.001493,0.885787,0.067018,0.007509,0.247374,0.002477,0.783076,0.11085,0.018662,0.376787
58,10MKA01CT617,"(1489, 71)","(1489, 71)","(1296, 71)","(96, 71)","(97, 71)","(1291, 1662)","(91, 1662)","(92, 1662)","(1291, 43)",0.001958,0.875026,0.090418,0.012783,0.336212,0.001714,0.875704,0.07907,0.010244,0.234519
