In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_diabetes, load_iris
from sklearn.model_selection import train_test_split
from Model_Training_and_Evaluation_Flow import modelTrainingFlow
from FT_D_Pipeline import ML_Pipeline


# 實作範例 - Iris Datasets

In [2]:
rawData = load_iris(as_frame = True)["data"]
rawData = pd.concat([rawData, load_iris(as_frame = True)["target"]], axis = 1)
trainData, testData = train_test_split(rawData, test_size = 0.2, shuffle = True) 
trainData, valiData = train_test_split(trainData, test_size = 0.25, shuffle = True) 
trainData, valiData, testData = trainData.reset_index(drop = True), valiData.reset_index(drop = True), testData.reset_index(drop = True) 

In [3]:
ml = ML_Pipeline(ml_methods = ["SMOTE", "standardization", "PCA"], inputFeatures = trainData.columns.tolist()[:-1], target = "target")
ml.fit_Pipeline(fit_data = trainData)
trainData, valiData, testData = [
    ml.transform_Pipeline(transform_data = j, mode = i) for i, j in zip(["train", "vali", "test"], [trainData, valiData, testData])
] 

In [4]:
totalResult = modelTrainingFlow(trainData = trainData,
                                                 valiData = valiData,
                                                 testData = testData,
                                                inputFeatures = trainData.drop(columns = ["target"]).columns.tolist(), 
                                                target = "target", 
                                                targetType = "classification",
                                                mainMetric = "accuracy", 
                                                featureSelection = None, 
                                                featureImportance = None,
                                                modelFileName = None)
totalResult.fit()


[{'Model': 'Random Forest with Entropy',
  'Features': ['sepal length (cm)',
   'sepal width (cm)',
   'petal length (cm)',
   'petal width (cm)'],
  'Set': 'train',
  'Number_of_Data': {0: 33, 1: 33, 2: 33},
  '0_F1-Score_for_1': 1.0,
  '0_F1-Score_for_0': 1.0,
  '0_Macro F1-Score': 1.0,
  '0_Micro F1-Score': 1.0,
  '0_prc_auc_1': 1.0,
  '0_prc_auc_0': 0.4210443493394933,
  '0_Precision_for_1': 1.0,
  '0_Precision_for_0': 1.0,
  '0_Macro Precision': 1.0,
  '0_Micro Precision': 1.0,
  '0_Recall_for_1': 1.0,
  '0_Recall_for_0': 1.0,
  '0_Macro Recall': 1.0,
  '0_Micro Recall': 1.0,
  '0_Accuracy': 1.0,
  '0_ROC-AUC': 1.0,
  '0_fpr': [0.0,
   0.0,
   0.48484848484848486,
   0.5151515151515151,
   0.5606060606060606,
   0.5757575757575758,
   1.0],
  '0_tpr': [0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
  '0_True_value': [1,
   0,
   1,
   0,
   1,
   0,
   1,
   1,
   0,
   1,
   1,
   1,
   1,
   0,
   0,
   1,
   0,
   1,
   0,
   0,
   1,
   1,
   1,
   0,
   0,
   0,
   1,
   1,
   1,
   1,


# 實作範例 - Diabete Datasets

In [2]:
rawData = load_diabetes(as_frame = True)["data"]
rawData = pd.concat([rawData, load_diabetes(as_frame = True)["target"]], axis = 1)
trainData, testData = train_test_split(rawData, test_size = 0.2, shuffle = True) 
trainData, valiData = train_test_split(trainData, test_size = 0.25, shuffle = True) 
trainData, valiData, testData = trainData.reset_index(drop = True), valiData.reset_index(drop = True), testData.reset_index(drop = True) 

## 特徵轉換、降維或核函數

In [3]:
ml = ML_Pipeline(ml_methods = ["standardization", "PCA"], inputFeatures = trainData.columns.tolist()[:-1], target = "target")
ml.fit_Pipeline(fit_data = trainData)
trainData, valiData, testData = [
    ml.transform_Pipeline(transform_data = j, mode = i) for i, j in zip(["train", "vali", "test"], [trainData, valiData, testData])
] 

## 模型訓練

In [4]:
totalResult = modelTrainingFlow(trainData = trainData,
                                                 valiData = valiData,
                                                 testData = testData,
                                                inputFeatures = trainData.drop(columns = ["target"]).columns.tolist(), 
                                                target = "target", 
                                                targetType = "regression",
                                                mainMetric = "mse", 
                                                featureSelection = None, 
                                                featureImportance = None,
                                                modelFileName = None)
totalResult.fit()


[{'Model': 'Random Forest with squared_error',
  'Features': ['age', 'sex', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6'],
  'Set': 'train',
  'Number_of_Data': 264,
  'MAE': 49.18586289161202,
  'MSE': 3373.817728613522,
  'RMSE': 58.08457392986129,
  'R2': 0.43314810540735227},
 {'Model': 'Random Forest with absolute_error',
  'Features': ['age', 'sex', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6'],
  'Set': 'train',
  'Number_of_Data': 264,
  'MAE': 43.77231600346468,
  'MSE': 2868.8873245337227,
  'RMSE': 53.561995150794395,
  'R2': 0.5179839736176033},
 {'Model': 'Random Forest with friedman_mse',
  'Features': ['age', 'sex', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6'],
  'Set': 'train',
  'Number_of_Data': 264,
  'MAE': 53.2675601994167,
  'MSE': 3926.5675964220313,
  'RMSE': 62.66232996324052,
  'R2': 0.34027785128966703},
 {'Model': 'ExtraTree with squared_error',
  'Features': ['age', 'sex', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6'],
  'Set': 'train',
  'Numb

In [None]:
# 模型評估結果
totalResult[0][3]

{'Model': 'LightGBM with ExtraTrees',
 'Features': ['age_degree_2',
  'sex_degree_2',
  'bmi_degree_2',
  'bp_degree_2',
  's1_degree_2',
  's2_degree_2',
  's3_degree_2',
  's4_degree_2',
  's5_degree_2',
  's6_degree_2',
  'age_sex',
  'age_bmi',
  'age_bp',
  'age_s1',
  'age_s2',
  'age_s3',
  'age_s4',
  'age_s5',
  'age_s6',
  'sex_bmi',
  'sex_bp',
  'sex_s1',
  'sex_s2',
  'sex_s3',
  'sex_s4',
  'sex_s5',
  'sex_s6',
  'bmi_bp',
  'bmi_s1',
  'bmi_s2',
  'bmi_s3',
  'bmi_s4',
  'bmi_s5',
  'bmi_s6',
  'bp_s1',
  'bp_s2',
  'bp_s3',
  'bp_s4',
  'bp_s5',
  'bp_s6',
  's1_s2',
  's1_s3',
  's1_s4',
  's1_s5',
  's1_s6',
  's2_s3',
  's2_s4',
  's2_s5',
  's2_s6',
  's3_s4',
  's3_s5',
  's3_s6',
  's4_s5',
  's4_s6',
  's5_s6'],
 'Set': 'train',
 'Number_of_Data': 264,
 'MAE': 19.99369348797126,
 'MSE': 590.3199250115812,
 'RMSE': 24.296500262621798,
 'R2': 0.9016171230378656}