In [1]:
import os, itertools
import numpy as np
import pandas as pd
from sklearn.datasets import load_diabetes, load_iris, load_breast_cancer
from sklearn.model_selection import train_test_split
from AutoML_Flow.MLEnv import *
from AutoML_Flow.Model_Training_and_Evaluation_Flow import modelTrainingFlow
import comet_ml
from comet_ml import Experiment

# 實作範例 - Breast Cancer（Binary Classification）

In [2]:
rawData = load_breast_cancer(as_frame = True)["data"]
rawData = pd.concat([rawData, load_breast_cancer(as_frame = True)["target"]], axis = 1)
rawData = rawData.rename(
    columns = {
        i: i.replace(" ", "_") for i in rawData.columns
    }
)


In [3]:
allResult = list()
comet_ml.init(
    api_key = "Gf2pShKy0vlxCromlw33qYwOg", 
    project_name = "test-comet"
)


trainData, testData = train_test_split(rawData, test_size = 0.2, shuffle = True) 
trainData, valiData = train_test_split(trainData, test_size = 0.25, shuffle = True) 
for oneFE in featureEngineerFlow[:5]:

    experiment = Experiment()

    totalResult = modelTrainingFlow(
        trainData = trainData,
        valiData = valiData,
        testData = testData,
        inputFeatures = trainData.drop(columns = ["target"]).columns.tolist(), 
        target = "target", 
        targetType = "classification",
        ml_methods = oneFE,
        hyperparameter_tuning_method = "default", 
        hyperparameter_tuning_epochs = 1, 
        HTMetric = "cross_entropy", 
        thresholdMetric = "f1_1", 
        featureSelection = oneFE["FeatureSelection"],
        modelNameList = [
            ["LightGBM"]
            # ["Random Forest with Entropy"],
            # ["LightGBM", "XGBoost", "Random Forest with Entropy"] * 10, 
        ], 
        fitBestModel = False,
        # metaLearner = "XGBoost", 
        # modelFilePath = "./", 
        importanceMethod = ["None"],
    )
    result = totalResult.fit()

    for one_parameters, one_metrics in zip(
        result["Evaluation"][0],
        result["Evaluation"][1]
    ):

        experiment.log_curve(
            name = "ROC",
            x = one_metrics["fpr"],
            y = one_metrics["tpr"]
        )

        one_metrics = {
            key: value
            for key, value in one_metrics.items() if not(type(value) == list)
        }
        experiment.log_parameters(one_parameters)
        experiment.log_metrics(one_metrics)

        # if one_parameters["Set"] == "train":
        #     with experiment.train():
        #         experiment.log_parameters(one_parameters)        
        #         experiment.log_metrics(one_metrics)  
        # elif one_parameters["Set"] == "vali":
        #     with experiment.validate():
        #         experiment.log_parameters(one_parameters)        
        #         experiment.log_metrics(one_metrics)  
        # elif one_parameters["Set"] == "test":
        #     with experiment.test():
        #         experiment.log_parameters(one_parameters)        
        #         experiment.log_metrics(one_metrics)    
    experiment.end()
    allResult.append(result)

[1;38;5;39mCOMET INFO:[0m Valid Comet API Key saved in /home/wang-jian-an/.comet.config (set COMET_CONFIG to change where it is saved).
[1;38;5;39mCOMET INFO:[0m Experiment is live on comet.com https://www.comet.com/wang-jian-an/test-comet/8ab0420efa2144e4b32f60bb8ae3cd2c



LightGBM Training
最佳 Threshold 0.6156776133205051


[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m Comet.ml Experiment Summary
[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m   Data:
[1;38;5;39mCOMET INFO:[0m     display_summary_level : 1
[1;38;5;39mCOMET INFO:[0m     url                   : https://www.comet.com/wang-jian-an/test-comet/8ab0420efa2144e4b32f60bb8ae3cd2c
[1;38;5;39mCOMET INFO:[0m   Metrics [count] (min, max):
[1;38;5;39mCOMET INFO:[0m     Number_of_Data_0 [3] : (40, 128)
[1;38;5;39mCOMET INFO:[0m     Number_of_Data_1 [3] : (70, 213)
[1;38;5;39mCOMET INFO:[0m     Set                  : test
[1;38;5;39mCOMET INFO:[0m     accuracy [3]         : (0.9736842105263158, 1.0)
[1;38;5;39mCOMET INFO:[0m     cross_entropy [3]    : (0.0005044318103485206, 0.1341765278810519)
[1;38;5;39mCOMET INFO:[0m     f1_0 [3]             : 

LightGBM Training
最佳 Threshold 0.6156776133205051


[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m Comet.ml Experiment Summary
[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m   Data:
[1;38;5;39mCOMET INFO:[0m     display_summary_level : 1
[1;38;5;39mCOMET INFO:[0m     url                   : https://www.comet.com/wang-jian-an/test-comet/f93bda6db34946bfbf7f78129e224a19
[1;38;5;39mCOMET INFO:[0m   Metrics [count] (min, max):
[1;38;5;39mCOMET INFO:[0m     Number_of_Data_0 [3] : (40, 128)
[1;38;5;39mCOMET INFO:[0m     Number_of_Data_1 [3] : (70, 213)
[1;38;5;39mCOMET INFO:[0m     Set                  : test
[1;38;5;39mCOMET INFO:[0m     accuracy [3]         : (0.9736842105263158, 1.0)
[1;38;5;39mCOMET INFO:[0m     cross_entropy [3]    : (0.0005267439550711965, 0.10447915981417596)
[1;38;5;39mCOMET INFO:[0m     f1_0 [3]             :

LightGBM Training
最佳 Threshold 0.7040441568843022


[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m Comet.ml Experiment Summary
[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m   Data:
[1;38;5;39mCOMET INFO:[0m     display_summary_level : 1
[1;38;5;39mCOMET INFO:[0m     url                   : https://www.comet.com/wang-jian-an/test-comet/1d14c1aa91034b6bbe4aa72afddec19e
[1;38;5;39mCOMET INFO:[0m   Metrics [count] (min, max):
[1;38;5;39mCOMET INFO:[0m     Number_of_Data_0 [3] : (40, 128)
[1;38;5;39mCOMET INFO:[0m     Number_of_Data_1 [3] : (70, 213)
[1;38;5;39mCOMET INFO:[0m     Set                  : test
[1;38;5;39mCOMET INFO:[0m     accuracy [3]         : (0.9736842105263158, 1.0)
[1;38;5;39mCOMET INFO:[0m     cross_entropy [3]    : (0.0003517921455408879, 0.15515625740244304)
[1;38;5;39mCOMET INFO:[0m     f1_0 [3]             :

LightGBM Training
最佳 Threshold 0.6156776133205051


[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m Comet.ml Experiment Summary
[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m   Data:
[1;38;5;39mCOMET INFO:[0m     display_summary_level : 1
[1;38;5;39mCOMET INFO:[0m     url                   : https://www.comet.com/wang-jian-an/test-comet/1d9e84e7d645445c9c8fdb1099d4f108
[1;38;5;39mCOMET INFO:[0m   Metrics [count] (min, max):
[1;38;5;39mCOMET INFO:[0m     Number_of_Data_0 [3] : (40, 128)
[1;38;5;39mCOMET INFO:[0m     Number_of_Data_1 [3] : (70, 213)
[1;38;5;39mCOMET INFO:[0m     Set                  : test
[1;38;5;39mCOMET INFO:[0m     accuracy [3]         : (0.9736842105263158, 1.0)
[1;38;5;39mCOMET INFO:[0m     cross_entropy [3]    : (0.0005223266275290266, 0.0979248516881491)
[1;38;5;39mCOMET INFO:[0m     f1_0 [3]             : 

LightGBM Training
最佳 Threshold 0.7586583141090728


[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m Comet.ml Experiment Summary
[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m   Data:
[1;38;5;39mCOMET INFO:[0m     display_summary_level : 1
[1;38;5;39mCOMET INFO:[0m     url                   : https://www.comet.com/wang-jian-an/test-comet/78252e8773b34ac7948639a01c8c83e8
[1;38;5;39mCOMET INFO:[0m   Metrics [count] (min, max):
[1;38;5;39mCOMET INFO:[0m     Number_of_Data_0 [3] : (40, 128)
[1;38;5;39mCOMET INFO:[0m     Number_of_Data_1 [3] : (70, 213)
[1;38;5;39mCOMET INFO:[0m     Set                  : test
[1;38;5;39mCOMET INFO:[0m     accuracy [3]         : (0.9649122807017544, 1.0)
[1;38;5;39mCOMET INFO:[0m     cross_entropy [3]    : (0.0008304953766727394, 0.13212212064878356)
[1;38;5;39mCOMET INFO:[0m     f1_0 [3]             :

In [6]:
pd.DataFrame.from_records(result["Evaluation"][0])

Unnamed: 0,Model,Meta-Learner,Features,FeatureSelection,Imbalanced,Decomposition,Standardization
0,LightGBM,,"[mean_radius, mean_texture, mean_perimeter, me...",,,PCA,
1,LightGBM,,"[mean_radius, mean_texture, mean_perimeter, me...",,,PCA,
2,LightGBM,,"[mean_radius, mean_texture, mean_perimeter, me...",,,PCA,


In [None]:
pd.DataFrame(result["PermutationImportance"]["originalData"])

In [None]:
pd.DataFrame(result["PermutationImportance"]["trainData"])

# 實作範例 - Iris Datasets（Multi-class Classification）

In [None]:
rawData = load_iris(as_frame = True)["data"]
rawData = pd.concat([rawData, load_iris(as_frame = True)["target"]], axis = 1)
rawData = rawData.rename(
    columns = {
        "sepal length (cm)": "sepal_length_(cm)",
        "sepal width (cm)": "sepal_width_(cm)",
        "petal length (cm)": "petal_length_(cm)",
        "petal width (cm)": "'petal_width_(cm)"
    }
)
trainData, testData = train_test_split(rawData, test_size = 0.2, shuffle = True) 
trainData, valiData = train_test_split(trainData, test_size = 0.25, shuffle = True) 
trainData, valiData, testData = trainData.reset_index(drop = True), valiData.reset_index(drop = True), testData.reset_index(drop = True) 

## Feature Engineer and Model Training

In [None]:
from AutoML_Flow.Model_Training_and_Evaluation_Flow import modelTrainingFlow 
for oneFE in featureEngineerFlow:
    totalResult = modelTrainingFlow(
        trainData = trainData,
        valiData = valiData,
        testData = testData,
        inputFeatures = trainData.drop(columns = ["target"]).columns.tolist(), 
        target = "target", 
        targetType = "classification",
        ml_methods = oneFE,
        HTMetric = "cross_entropy",
        thresholdMetric = "f1_1", 
        featureSelection = oneFE["FeatureSelection"],
        hyperparameter_tuning_method = "TPESampler", 
        hyperparameter_tuning_epochs = 1
    )
    result = totalResult.fit(permutationImportanceMethod = ["trainData", "originalData"])

In [None]:
pd.DataFrame(result["Evaluation"])

In [None]:
pd.DataFrame(result["PermutationImportance"]["originalData"])

In [None]:
pd.DataFrame(result["PermutationImportance"]["trainData"])

# 實作範例 - Diabete Datasets

In [None]:
rawData = load_diabetes(as_frame = True)["data"]
rawData = pd.concat([rawData, load_diabetes(as_frame = True)["target"]], axis = 1)
trainData, testData = train_test_split(rawData, test_size = 0.2, shuffle = True) 
trainData, valiData = train_test_split(trainData, test_size = 0.25, shuffle = True) 
trainData, valiData, testData = trainData.reset_index(drop = True), valiData.reset_index(drop = True), testData.reset_index(drop = True) 

## Feature Engineer and Model Training

In [None]:
from AutoML_Flow.Model_Training_and_Evaluation_Flow import modelTrainingFlow 
for oneFE in featureEngineerFlow:
    modelTrainingFlow = modelTrainingFlow(
        trainData = trainData,
        valiData = valiData,
        testData = testData,
        inputFeatures = trainData.drop(columns = ["target"]).columns.tolist(), 
        target = "target", 
        ml_methods = oneFE,
        targetType = "regression",
        HTMetric = "RMSE", 
        hyperparameter_tuning_method = "TPESampler", 
        hyperparameter_tuning_epochs = 1, 
        featureSelection = oneFE["FeatureSelection"],
        importanceMethod = ["None"],
        modelNameList = [
            ["LightGBM", "CatBoost"]
        ]
    )
    totalResult = modelTrainingFlow.fit()
    break

In [None]:
pd.DataFrame(totalResult["Evaluation"])

In [None]:
pd.DataFrame(totalResult["Evaluation"])

# 不平衡資料處理範例

In [None]:
import numpy as np
import pandas as pd
from imblearn.datasets import fetch_dataset
dataset = fetch_dataset()["thyroid_sick"]