In [1]:
import os
import numpy as np
import pandas as pd
from sklearn.datasets import load_diabetes, load_iris, load_breast_cancer
from sklearn.model_selection import train_test_split
from Model_Training_and_Evaluation_Flow import modelTrainingFlow
from PermutationImportance import permutation_importance

# 實作範例 - Breast Cancer（Binary Classification）

In [2]:
rawData = load_breast_cancer(as_frame = True)["data"]
rawData = pd.concat([rawData, load_breast_cancer(as_frame = True)["target"]], axis = 1)
rawData = rawData.rename(
    columns = {
        i: i.replace(" ", "_") for i in rawData.columns
    }
)
trainData, testData = train_test_split(rawData, test_size = 0.2, shuffle = True) 
trainData, valiData = train_test_split(trainData, test_size = 0.25, shuffle = True) 
trainData, valiData, testData = trainData.reset_index(drop = True), valiData.reset_index(drop = True), testData.reset_index(drop = True) 

## Feature Engineer and Model Training

In [3]:
totalResult = modelTrainingFlow(trainData = trainData,
                                valiData = valiData,
                                testData = testData,
                                inputFeatures = trainData.drop(columns = ["target"]).columns.tolist(), 
                                target = "target", 
                                targetType = "classification",
                                num_baggings = 2, 
                                ml_methods = ["standardization"],
                                mainMetric = "auroc", 
                                featureSelection = None, 
                                featureImportance = None)
result = totalResult.fit(permutationImportanceMethod = ["trainData", "originalData"])

Random Forest with Entropy_baggings Training


100%|███████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 334.31it/s]


Random Forest with Gini_baggings Training


100%|██████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 2261.08it/s]


ExtraTree with Entropy_baggings Training


100%|██████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 1000.07it/s]


ExtraTree with Gini_baggings Training


100%|██████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 2003.49it/s]


XGBoost_baggings Training


100%|██████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 1002.22it/s]


LightGBM_baggings Training


100%|██████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 2000.14it/s]


LightGBM with ExtraTrees_baggings Training


100%|████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<?, ?it/s]


  0%|          | 0/21 [00:00<?, ?it/s]

  0%|          | 0/21 [00:00<?, ?it/s]

  0%|          | 0/21 [00:00<?, ?it/s]

  0%|          | 0/21 [00:00<?, ?it/s]

  0%|          | 0/21 [00:00<?, ?it/s]

  0%|          | 0/21 [00:00<?, ?it/s]

In [4]:
pd.DataFrame(result["Evaluation"])

Unnamed: 0,Model,Features,Set,Number_of_Data,Num_Baggings,F1-Score_for_1,F1-Score_for_0,Macro F1-Score,Micro F1-Score,prc_auc_1,...,Recall_for_0,Macro Recall,Micro Recall,Accuracy,ROC-AUC,fpr,tpr,True_value,Predict_value,Predict_prob_value
0,Random Forest with Entropy_baggings,"[mean_radius, mean_texture, mean_perimeter, me...",train,"{1: 213, 0: 128}",2,0.954128,0.918699,0.936414,0.941349,0.992333,...,0.882812,0.929669,0.941349,0.941349,0.988226,"[0.0, 0.0, 0.0, 0.0078125, 0.0078125, 0.015625...","[0.0, 0.004694835680751174, 0.6525821596244131...",0 1 1 1 2 0 3 1 4 1  ...,"[1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, ...","[0.809210329363164, 0.7074343026570763, 0.3584..."
1,Random Forest with Gini_baggings,"[mean_radius, mean_texture, mean_perimeter, me...",train,"{1: 213, 0: 128}",2,0.768953,0.0,0.384477,0.624633,0.98935,...,0.0,0.5,0.624633,0.624633,0.984228,"[0.0, 0.015625, 0.015625, 0.015625, 0.015625, ...","[0.0, 0.7746478873239436, 0.7934272300469484, ...",0 1 1 1 2 0 3 1 4 1  ...,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[0.6571381629206511, 0.6524757513650167, 0.641..."
2,ExtraTree with Entropy_baggings,"[mean_radius, mean_texture, mean_perimeter, me...",train,"{1: 213, 0: 128}",2,0.768953,0.0,0.384477,0.624633,0.812317,...,0.0,0.5,0.624633,0.624633,0.5,"[0.0, 1.0]","[0.0, 1.0]",0 1 1 1 2 0 3 1 4 1  ...,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[0.6507661352731775, 0.6507661352731775, 0.650..."
3,ExtraTree with Gini_baggings,"[mean_radius, mean_texture, mean_perimeter, me...",train,"{1: 213, 0: 128}",2,0.768953,0.0,0.384477,0.624633,0.812317,...,0.0,0.5,0.624633,0.624633,0.5,"[0.0, 1.0]","[0.0, 1.0]",0 1 1 1 2 0 3 1 4 1  ...,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[0.6507661352731775, 0.6507661352731775, 0.650..."
4,XGBoost_baggings,"[mean_radius, mean_texture, mean_perimeter, me...",train,"{1: 213, 0: 128}",2,0.993007,0.988142,0.990575,0.991202,0.999372,...,0.976562,0.988281,0.991202,0.991202,0.998973,"[0.0, 0.0, 0.0, 0.0078125, 0.0078125, 0.023437...","[0.0, 0.004694835680751174, 0.9248826291079812...",0 1 1 1 2 0 3 1 4 1  ...,"[1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, ...","[0.9700350165367126, 0.9455178081989288, 0.034..."
5,LightGBM_baggings,"[mean_radius, mean_texture, mean_perimeter, me...",train,"{1: 213, 0: 128}",2,0.986047,0.97619,0.981118,0.982405,0.996628,...,0.960938,0.978121,0.982405,0.982405,0.995379,"[0.0, 0.0, 0.0, 0.0078125, 0.0078125, 0.015625...","[0.0, 0.004694835680751174, 0.5774647887323944...",0 1 1 1 2 0 3 1 4 1  ...,"[1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, ...","[0.9561953378141176, 0.8780191331880522, 0.064..."
6,LightGBM with ExtraTrees_baggings,"[mean_radius, mean_texture, mean_perimeter, me...",train,"{1: 213, 0: 128}",2,0.986047,0.97619,0.981118,0.982405,0.997237,...,0.960938,0.978121,0.982405,0.982405,0.996002,"[0.0, 0.0, 0.0, 0.0078125, 0.0078125, 0.015625...","[0.0, 0.004694835680751174, 0.6572769953051644...",0 1 1 1 2 0 3 1 4 1  ...,"[1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, ...","[0.9367051267805127, 0.9195252710894577, 0.063..."
7,Random Forest with Entropy_baggings,"[mean_radius, mean_texture, mean_perimeter, me...",vali,"{1: 78, 0: 36}",2,0.962963,0.909091,0.936027,0.947368,0.998415,...,0.833333,0.916667,0.947368,0.947368,0.996439,"[0.0, 0.0, 0.0, 0.027777777777777776, 0.027777...","[0.0, 0.01282051282051282, 0.9487179487179487,...",0 0 1 1 2 1 3 0 4 0  ...,"[0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, ...","[0.21292960486640722, 0.8416165286174733, 0.82..."
8,Random Forest with Gini_baggings,"[mean_radius, mean_texture, mean_perimeter, me...",vali,"{1: 78, 0: 36}",2,0.8125,0.0,0.40625,0.684211,0.99682,...,0.0,0.5,0.684211,0.684211,0.992699,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.027777777777777776...","[0.0, 0.7435897435897436, 0.8205128205128205, ...",0 0 1 1 2 1 3 0 4 0  ...,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[0.6392775920318998, 0.6573405927734304, 0.657..."
9,ExtraTree with Entropy_baggings,"[mean_radius, mean_texture, mean_perimeter, me...",vali,"{1: 78, 0: 36}",2,0.8125,0.0,0.40625,0.684211,0.842105,...,0.0,0.5,0.684211,0.684211,0.5,"[0.0, 1.0]","[0.0, 1.0]",0 0 1 1 2 1 3 0 4 0  ...,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[0.6507661352731775, 0.6507661352731775, 0.650..."


In [5]:
pd.DataFrame(result["PermutationImportance"]["originalData"])

Unnamed: 0,Model,Set,Feature,Mean-Importance,Std-Importance,p-value for t-test,originalMetric,Metric_for_Each_Data,Importance_for_Each_Data
0,Random Forest with Entropy_baggings,train,mean_radius,5.865103e-04,0.002873,0.704000,0.9413,"[0.9413, 0.9472, 0.9413, 0.9384, 0.9413]","[0, 0, 0, 0, 0]"
1,Random Forest with Gini_baggings,train,mean_texture,0.000000e+00,0.000000,,0.9413,"[0.9413, 0.9413, 0.9413, 0.9413, 0.9413]","[0, 0, 0, 0, 0]"
2,ExtraTree with Entropy_baggings,train,mean_perimeter,5.865103e-03,0.001855,0.003198,0.9413,"[0.9443, 0.9501, 0.9472, 0.9472, 0.9472]","[0, 0, 0, 0, 0]"
3,ExtraTree with Gini_baggings,train,mean_area,1.759531e-03,0.002346,0.208000,0.9413,"[0.9413, 0.9443, 0.9413, 0.9413, 0.9472]","[0, 0, 0, 0, 0]"
4,XGBoost_baggings,train,mean_smoothness,0.000000e+00,0.000000,,0.9413,"[0.9413, 0.9413, 0.9413, 0.9413, 0.9413]","[0, 0, 0, 0, 0]"
...,...,...,...,...,...,...,...,...,...
436,ExtraTree with Entropy_baggings,test,concavity_error,0.000000e+00,0.000000,,0.9737,"[0.9737, 0.9737, 0.9737, 0.9737, 0.9737]","[0, 0, 0, 0, 0]"
437,ExtraTree with Gini_baggings,test,concave_points_error,0.000000e+00,0.000000,,0.9737,"[0.9737, 0.9737, 0.9737, 0.9737, 0.9737]","[0, 0, 0, 0, 0]"
438,XGBoost_baggings,test,symmetry_error,-2.220446e-17,0.005548,1.000000,0.9737,"[0.9649, 0.9737, 0.9825, 0.9737, 0.9737]","[0, 0, 0, 0, 0]"
439,LightGBM_baggings,test,fractal_dimension_error,0.000000e+00,0.000000,,0.9737,"[0.9737, 0.9737, 0.9737, 0.9737, 0.9737]","[0, 0, 0, 0, 0]"


In [6]:
pd.DataFrame(result["PermutationImportance"]["trainData"])

Unnamed: 0,Model,Set,Feature,Mean-Importance,Std-Importance,p-value for t-test,originalMetric,Metric_for_Each_Data,Importance_for_Each_Data
0,Random Forest with Entropy_baggings,train,mean_radius,-0.001173,0.001437,0.177808,0.9413,"[0.9413, 0.9413, 0.9384, 0.9413, 0.9384]","[0, 0, 0, 0, 0]"
1,Random Forest with Gini_baggings,train,mean_texture,0.000000,0.000000,,0.9413,"[0.9413, 0.9413, 0.9413, 0.9413, 0.9413]","[0, 0, 0, 0, 0]"
2,ExtraTree with Entropy_baggings,train,mean_perimeter,0.005279,0.003420,0.036682,0.9413,"[0.9443, 0.9472, 0.9443, 0.9443, 0.9531]","[0, 0, 0, 0, 0]"
3,ExtraTree with Gini_baggings,train,mean_area,0.003519,0.002195,0.032678,0.9413,"[0.9472, 0.9472, 0.9443, 0.9413, 0.9443]","[0, 0, 0, 0, 0]"
4,XGBoost_baggings,train,mean_smoothness,0.000000,0.000000,,0.9413,"[0.9413, 0.9413, 0.9413, 0.9413, 0.9413]","[0, 0, 0, 0, 0]"
...,...,...,...,...,...,...,...,...,...
436,ExtraTree with Entropy_baggings,test,concavity_error,0.000000,0.000000,,0.9737,"[0.9737, 0.9737, 0.9737, 0.9737, 0.9737]","[0, 0, 0, 0, 0]"
437,ExtraTree with Gini_baggings,test,concave_points_error,0.000000,0.000000,,0.9737,"[0.9737, 0.9737, 0.9737, 0.9737, 0.9737]","[0, 0, 0, 0, 0]"
438,XGBoost_baggings,test,symmetry_error,-0.005263,0.007018,0.208000,0.9737,"[0.9649, 0.9649, 0.9649, 0.9649, 0.9825]","[0, 0, 0, 0, 0]"
439,LightGBM_baggings,test,fractal_dimension_error,0.000000,0.000000,,0.9737,"[0.9737, 0.9737, 0.9737, 0.9737, 0.9737]","[0, 0, 0, 0, 0]"


# 實作範例 - Iris Datasets（Multi-class Classification）

In [8]:
rawData = load_iris(as_frame = True)["data"]
rawData = pd.concat([rawData, load_iris(as_frame = True)["target"]], axis = 1)
rawData = rawData.rename(
    columns = {
        "sepal length (cm)": "sepal_length_(cm)",
        "sepal width (cm)": "sepal_width_(cm)",
        "petal length (cm)": "petal_length_(cm)",
        "petal width (cm)": "'petal_width_(cm)"
    }
)
trainData, testData = train_test_split(rawData, test_size = 0.2, shuffle = True) 
trainData, valiData = train_test_split(trainData, test_size = 0.25, shuffle = True) 
trainData, valiData, testData = trainData.reset_index(drop = True), valiData.reset_index(drop = True), testData.reset_index(drop = True) 

## Feature Engineer and Model Training

In [9]:
totalResult = modelTrainingFlow(trainData = trainData,
                                valiData = valiData,
                                testData = testData,
                                inputFeatures = trainData.drop(columns = ["target"]).columns.tolist(), 
                                target = "target", 
                                targetType = "classification",
                                ml_methods = ["standardization"],
                                mainMetric = "accuracy", 
                                featureSelection = None, 
                                featureImportance = None,
                                modelFileName = "iris")
result = totalResult.fit(permutationImportanceMethod = ["trainData", "originalData"])

  0%|          | 0/21 [00:00<?, ?it/s]

  0%|          | 0/21 [00:00<?, ?it/s]

  0%|          | 0/21 [00:00<?, ?it/s]

  0%|          | 0/21 [00:00<?, ?it/s]

  0%|          | 0/21 [00:00<?, ?it/s]

  0%|          | 0/21 [00:00<?, ?it/s]

In [10]:
pd.DataFrame(result["Evaluation"])

Unnamed: 0,Model,Features,Set,Number_of_Data,0_F1-Score_for_1,0_F1-Score_for_0,0_Macro F1-Score,0_Micro F1-Score,0_prc_auc_1,0_prc_auc_0,...,2_Macro Recall,2_Micro Recall,2_Accuracy,2_ROC-AUC,2_fpr,2_tpr,2_True_value,2_Predict_value,2_Predict_prob_value,Accuracy
0,Random Forest with Entropy,"[sepal_length_(cm), sepal_width_(cm), petal_le...",train,"{0: 32, 1: 31, 2: 27}",0.955224,0.973451,0.964338,0.966667,1.0,0.42794,...,0.960317,0.944444,0.944444,0.993827,"[0.0, 0.0, 0.0, 0.015873015873015872, 0.015873...","[0.0, 0.7407407407407407, 0.8148148148148148, ...","[1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, ...","[1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, ...","[0.4649021226829475, 0.4649021226829475, 0.137...",0.911111
1,Random Forest with Gini,"[sepal_length_(cm), sepal_width_(cm), petal_le...",train,"{0: 32, 1: 31, 2: 27}",0.0,0.783784,0.391892,0.644444,0.677778,0.822222,...,0.5,0.7,0.7,0.5,"[0.0, 1.0]","[0.0, 1.0]","[1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.30344444444444446, 0.30344444444444446, 0.3...",0.344444
2,ExtraTree with Entropy,"[sepal_length_(cm), sepal_width_(cm), petal_le...",train,"{0: 32, 1: 31, 2: 27}",1.0,1.0,1.0,1.0,1.0,0.322222,...,0.973545,0.977778,0.977778,0.982951,"[0.0, 0.015873015873015872, 0.4920634920634920...","[0.0, 0.9629629629629629, 1.0, 1.0]","[1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, ...","[1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, ...","[0.9722222222222222, 0.9722222222222222, 0.0, ...",0.977778
3,ExtraTree with Gini,"[sepal_length_(cm), sepal_width_(cm), petal_le...",train,"{0: 32, 1: 31, 2: 27}",0.0,0.783784,0.391892,0.644444,0.677778,0.822222,...,0.5,0.7,0.7,0.5,"[0.0, 1.0]","[0.0, 1.0]","[1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.30833333333333335, 0.30833333333333335, 0.3...",0.344444
4,XGBoost,"[sepal_length_(cm), sepal_width_(cm), petal_le...",train,"{0: 32, 1: 31, 2: 27}",1.0,1.0,1.0,1.0,1.0,0.429435,...,0.957672,0.955556,0.955556,0.996473,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.015873015873015872...","[0.0, 0.037037037037037035, 0.7037037037037037...","[1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, ...","[1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, ...","[0.9436390995979309, 0.8897717595100403, 0.005...",0.955556
5,LightGBM,"[sepal_length_(cm), sepal_width_(cm), petal_le...",train,"{0: 32, 1: 31, 2: 27}",1.0,1.0,1.0,1.0,1.0,0.429402,...,0.97619,0.966667,0.966667,0.997061,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.015...","[0.0, 0.1111111111111111, 0.25925925925925924,...","[1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, ...","[1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, ...","[0.9320575950028092, 0.8791776477786297, 0.017...",0.966667
6,LightGBM with ExtraTrees,"[sepal_length_(cm), sepal_width_(cm), petal_le...",train,"{0: 32, 1: 31, 2: 27}",1.0,1.0,1.0,1.0,1.0,0.429431,...,0.97619,0.966667,0.966667,0.997061,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.037037037037037035, 0.1481481481481481...","[1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, ...","[1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, ...","[0.9937775358814237, 0.8965074836766205, 7.468...",0.966667
7,Random Forest with Entropy,"[sepal_length_(cm), sepal_width_(cm), petal_le...",vali,"{1: 11, 2: 10, 0: 9}",0.9,0.95,0.925,0.933333,1.0,0.481376,...,0.975,0.966667,0.966667,1.0,"[0.0, 0.0, 0.0, 0.2, 0.3, 0.55, 1.0]","[0.0, 0.8, 1.0, 1.0, 1.0, 1.0, 1.0]","[1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...","[0.4649021226829473, 0.4649021226829473, 0.137...",0.9
8,Random Forest with Gini,"[sepal_length_(cm), sepal_width_(cm), petal_le...",vali,"{1: 11, 2: 10, 0: 9}",0.0,0.823529,0.411765,0.7,0.65,0.85,...,0.5,0.666667,0.666667,0.5,"[0.0, 1.0]","[0.0, 1.0]","[1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.3034444444444445, 0.3034444444444445, 0.303...",0.366667
9,ExtraTree with Entropy,"[sepal_length_(cm), sepal_width_(cm), petal_le...",vali,"{1: 11, 2: 10, 0: 9}",1.0,1.0,1.0,1.0,1.0,0.35,...,0.95,0.966667,0.966667,0.9725,"[0.0, 0.0, 0.55, 1.0]","[0.0, 0.9, 1.0, 1.0]","[1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.9722222222222222, 0.9722222222222222, 0.0, ...",0.966667


In [11]:
pd.DataFrame(result["PermutationImportance"]["originalData"])

Unnamed: 0,Model,Set,Feature,Mean-Importance,Std-Importance,p-value for t-test,originalMetric,Metric_for_Each_Data,Importance_for_Each_Data
0,Random Forest with Entropy,train,sepal_length_(cm),-0.004444,0.005443,0.177808,0.9111,"[0.9111, 0.9, 0.9, 0.9111, 0.9111]","[0.0, -0.0111, -0.0111, 0.0, 0.0]"
1,Random Forest with Gini,train,sepal_width_(cm),0.000000,0.000000,,0.9111,"[0.9111, 0.9111, 0.9111, 0.9111, 0.9111]","[0.0, 0.0, 0.0, 0.0, 0.0]"
2,ExtraTree with Entropy,train,petal_length_(cm),-0.362222,0.023934,0.000007,0.9111,"[0.5333, 0.5111, 0.5556, 0.5667, 0.5778]","[-0.3778, -0.4, -0.3556, -0.3444, -0.3333]"
3,ExtraTree with Gini,train,'petal_width_(cm),-0.224444,0.021545,0.000031,0.9111,"[0.6889, 0.6778, 0.7222, 0.6889, 0.6556]","[-0.2222, -0.2333, -0.1889, -0.2222, -0.2556]"
4,Random Forest with Entropy,train,sepal_length_(cm),0.000000,0.000000,,0.3444,"[0.3444, 0.3444, 0.3444, 0.3444, 0.3444]","[0.0, 0.0, 0.0, 0.0, 0.0]"
...,...,...,...,...,...,...,...,...,...
79,ExtraTree with Gini,train,'petal_width_(cm),-0.060000,0.024944,0.008581,0.9333,"[0.8667, 0.9, 0.8667, 0.8333, 0.9]","[-0.0667, -0.0333, -0.0667, -0.1, -0.0333]"
80,Random Forest with Entropy,train,sepal_length_(cm),-0.053333,0.033993,0.034920,0.9333,"[0.9333, 0.9, 0.8667, 0.8667, 0.8333]","[0.0, -0.0333, -0.0667, -0.0667, -0.1]"
81,Random Forest with Gini,train,sepal_width_(cm),-0.033333,0.021082,0.034109,0.9333,"[0.8667, 0.9333, 0.9, 0.9, 0.9]","[-0.0667, 0.0, -0.0333, -0.0333, -0.0333]"
82,ExtraTree with Entropy,train,petal_length_(cm),-0.400000,0.051640,0.000101,0.9333,"[0.5, 0.5, 0.5, 0.5333, 0.6333]","[-0.4333, -0.4333, -0.4333, -0.4, -0.3]"


In [12]:
pd.DataFrame(result["PermutationImportance"]["trainData"])

Unnamed: 0,Model,Set,Feature,Mean-Importance,Std-Importance,p-value for t-test,originalMetric,Metric_for_Each_Data,Importance_for_Each_Data
0,Random Forest with Entropy,train,sepal_length_(cm),-0.002222,0.004444,0.373901,0.9111,"[0.9111, 0.9111, 0.9, 0.9111, 0.9111]","[0.0, 0.0, -0.0111, 0.0, 0.0]"
1,Random Forest with Entropy,train,sepal_width_(cm),0.000000,0.000000,,0.9111,"[0.9111, 0.9111, 0.9111, 0.9111, 0.9111]","[0.0, 0.0, 0.0, 0.0, 0.0]"
2,Random Forest with Entropy,train,petal_length_(cm),-0.342222,0.023727,0.000009,0.9111,"[0.5556, 0.6111, 0.5778, 0.5444, 0.5556]","[-0.3556, -0.3, -0.3333, -0.3667, -0.3556]"
3,Random Forest with Entropy,train,'petal_width_(cm),-0.206667,0.034854,0.000290,0.9111,"[0.7667, 0.6667, 0.7, 0.6778, 0.7111]","[-0.1444, -0.2444, -0.2111, -0.2333, -0.2]"
4,Random Forest with Entropy,train,sepal_length_(cm),0.000000,0.000000,,0.3444,"[0.3444, 0.3444, 0.3444, 0.3444, 0.3444]","[0.0, 0.0, 0.0, 0.0, 0.0]"
...,...,...,...,...,...,...,...,...,...
79,Random Forest with Entropy,train,'petal_width_(cm),-0.033333,0.021082,0.034109,0.9333,"[0.9, 0.9, 0.9, 0.8667, 0.9333]","[-0.0333, -0.0333, -0.0333, -0.0667, 0.0]"
80,Random Forest with Entropy,train,sepal_length_(cm),-0.066667,0.029814,0.011056,0.9333,"[0.8333, 0.9, 0.9, 0.8667, 0.8333]","[-0.1, -0.0333, -0.0333, -0.0667, -0.1]"
81,Random Forest with Entropy,train,sepal_width_(cm),-0.013333,0.040000,0.541470,0.9333,"[0.9667, 0.9, 0.8667, 0.9667, 0.9]","[0.0333, -0.0333, -0.0667, 0.0333, -0.0333]"
82,Random Forest with Entropy,train,petal_length_(cm),-0.453333,0.116619,0.001476,0.9333,"[0.4667, 0.7, 0.4, 0.3667, 0.4667]","[-0.4667, -0.2333, -0.5333, -0.5667, -0.4667]"


# 實作範例 - Diabete Datasets

In [7]:
rawData = load_diabetes(as_frame = True)["data"]
rawData = pd.concat([rawData, load_diabetes(as_frame = True)["target"]], axis = 1)
trainData, testData = train_test_split(rawData, test_size = 0.2, shuffle = True) 
trainData, valiData = train_test_split(trainData, test_size = 0.25, shuffle = True) 
trainData, valiData, testData = trainData.reset_index(drop = True), valiData.reset_index(drop = True), testData.reset_index(drop = True) 

## Feature Engineer and Model Training

In [8]:
modelTrainingFlow = modelTrainingFlow(trainData = trainData,
                                 valiData = valiData,
                                 testData = testData,
                                inputFeatures = trainData.drop(columns = ["target"]).columns.tolist(), 
                                target = "target", 
                                ml_methods = ["None"],
                                targetType = "regression",
                                num_baggings = 2, 
                                mainMetric = "mse", 
                                featureSelection = None, 
                                featureImportance = None)
totalResult = modelTrainingFlow.fit()

Random Forest with squared_error_baggings Training


100%|███████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 393.28it/s]


Random Forest with absolute_error_baggings Training


100%|██████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 2003.97it/s]


Random Forest with friedman_mse_baggings Training


100%|████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<?, ?it/s]


ExtraTree with squared_error_baggings Training


100%|██████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 2172.65it/s]


ExtraTree with absolute_error_baggings Training


100%|██████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 2003.01it/s]


ExtraTree with friedman_mse_baggings Training


100%|██████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 1911.72it/s]


XGBoost_baggings Training


100%|███████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 995.56it/s]


LightGBM_baggings Training


100%|██████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 1992.54it/s]


LightGBM with ExtraTrees_baggings Training


100%|██████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 1997.29it/s]


  0%|          | 0/27 [00:00<?, ?it/s]

  0%|          | 0/27 [00:00<?, ?it/s]

In [9]:
pd.DataFrame(totalResult["Evaluation"])

Unnamed: 0,Model,Features,Set,Number_of_Data,Num_Baggings,MAE,MSE,RMSE,R2
0,Random Forest with squared_error_baggings,"[age, sex, bmi, bp, s1, s2, s3, s4, s5, s6]",train,264,2,44.426024,2862.64712,53.503711,0.495501
1,Random Forest with absolute_error_baggings,"[age, sex, bmi, bp, s1, s2, s3, s4, s5, s6]",train,264,2,44.361731,2974.986021,54.543432,0.475703
2,Random Forest with friedman_mse_baggings,"[age, sex, bmi, bp, s1, s2, s3, s4, s5, s6]",train,264,2,46.671195,3124.966029,55.901396,0.449271
3,ExtraTree with squared_error_baggings,"[age, sex, bmi, bp, s1, s2, s3, s4, s5, s6]",train,264,2,46.598231,3127.332192,55.922555,0.448854
4,ExtraTree with absolute_error_baggings,"[age, sex, bmi, bp, s1, s2, s3, s4, s5, s6]",train,264,2,50.761364,3961.068182,62.937018,0.30192
5,ExtraTree with friedman_mse_baggings,"[age, sex, bmi, bp, s1, s2, s3, s4, s5, s6]",train,264,2,49.056922,3510.137423,59.246413,0.38139
6,XGBoost_baggings,"[age, sex, bmi, bp, s1, s2, s3, s4, s5, s6]",train,264,2,17.548582,536.029712,23.152315,0.905533
7,LightGBM_baggings,"[age, sex, bmi, bp, s1, s2, s3, s4, s5, s6]",train,264,2,32.285459,1639.894033,40.495605,0.710993
8,LightGBM with ExtraTrees_baggings,"[age, sex, bmi, bp, s1, s2, s3, s4, s5, s6]",train,264,2,38.951802,2346.233761,48.437937,0.586511
9,Random Forest with squared_error_baggings,"[age, sex, bmi, bp, s1, s2, s3, s4, s5, s6]",vali,89,2,41.00362,2476.988191,49.76935,0.566439
