In [1]:
import bentoml, mlflow, optuna, uuid
from sklearn.model_selection import train_test_split, cross_validate
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.datasets import load_iris
data = load_iris()
X = data.data
y = data.target
UNIQUE_PREFIX = str(uuid.uuid4())[:8]
def objective(trial, X, y):
    param = {
        'n_estimators': trial.suggest_int('n_estimators', 10, 100),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 1.0),
    }
    mlflow.log_params(param)
    model = AdaBoostClassifier(**param)
    scores = cross_validate(model, X, y, cv=5, n_jobs=-1, return_train_score=True, scoring=['f1_weighted', 'accuracy', 'recall', 'precision'])

    # mlflow.log_metrics('scores', scores)
    # mlflow.log_metric('train_accuracy', scores['train_accuracy'].mean())

    # mlflow.log_metric('test_accuracy', scores['test_accuracy'].mean())
    # mlflow.log_metric('test_recall', scores['test_recall'].mean())
    # mlflow.log_metric('test_precision', scores['test_precision'].mean())
    # mlflow.log_metric('test_f1_weighted', scores['test_f1_weighted'].mean())
    
    train_f1_score = scores['train_f1_weighted'].mean()
    test_f1_score = scores['test_f1_weighted'].mean()

    train_accuracy_score = scores['train_accuracy'].mean()
    test_accuracy_score = scores['test_accuracy'].mean()

    train_recall_score = scores['train_recall'].mean()
    test_recall_score = scores['test_recall'].mean()

    train_precision_score = scores['train_precision'].mean()
    test_precision_score = scores['test_precision'].mean()

    metrics = {'train_f1_score': train_f1_score, 'test_f1_score': test_f1_score,
            'train_accuracy_score': train_accuracy_score, 'test_accuracy_score': test_accuracy_score,
            'train_recall_score': train_recall_score, 'test_recall_score': test_recall_score,
            'train_precision_score': train_precision_score, 'test_precision_score': test_precision_score}
    mlflow.log_metrics(metrics)
    
    # for metric_name, score_value in scores.items():
    #     if metric_name == 'fit_time' or metric_name == 'score_time':
    #         continue
    #     else:
    #         mlflow.log_metric(metric_name, score_value.mean())
            
    return scores['train_accuracy'].mean()


* 'schema_extra' has been renamed to 'json_schema_extra'
  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def tune_AdaBoostClassifier(n_trials, X_train, y_train):
    experiment_name = 'AdaBoostClassifier-hpo'
    mlflow.set_tracking_uri('http://127.0.0.1:8081')
    mlflow.set_experiment(experiment_name)

    study = optuna.create_study(direction='maximize')  # 'maximize'일 경우 정확도 같은 지표를 최대화
    study.optimize(lambda trial: objective(trial, X_train, y_train), n_trials=n_trials)
    best_params = study.best_params  
    # best_model_value = study.best_value

    run_name = 'best-model-AdaBoostClassifier'
    with mlflow.start_run(run_name=run_name):
        mlflow.log_params(best_params)
        model_instance = AdaBoostClassifier(**best_params)
        scores = cross_validate(model_instance, X, y, cv=5, n_jobs=-1, return_train_score=True, scoring=['f1_weighted', 'accuracy', 'recall', 'precision'])
    
        train_f1_score = scores['train_f1_weighted'].mean()
        test_f1_score = scores['test_f1_weighted'].mean()

        train_accuracy_score = scores['train_accuracy'].mean()
        test_accuracy_score = scores['test_accuracy'].mean()

        train_recall_score = scores['train_recall'].mean()
        test_recall_score = scores['test_recall'].mean()

        train_precision_score = scores['train_precision'].mean()
        test_precision_score = scores['test_precision'].mean()

        metrics = {'train_f1_score': train_f1_score, 'test_f1_score': test_f1_score,
                'train_accuracy_score': train_accuracy_score, 'test_accuracy_score': test_accuracy_score,
                'train_recall_score': train_recall_score, 'test_recall_score': test_recall_score,
                'train_precision_score': train_precision_score, 'test_precision_score': test_precision_score}
        mlflow.log_metrics(metrics)

In [3]:
tune_AdaBoostClassifier(5, X, y)

2023/11/24 00:31:40 INFO mlflow.tracking.fluent: Experiment with name 'AdaBoostClassifier-hpo' does not exist. Creating a new experiment.
[I 2023-11-24 00:31:40,322] A new study created in memory with name: no-name-45ed08f6-601f-4b4a-af89-3054384087d3
[I 2023-11-24 00:31:44,452] Trial 0 finished with value: 0.9333333333333333 and parameters: {'n_estimators': 89, 'learning_rate': 0.162932517882371}. Best is trial 0 with value: 0.9333333333333333.


In [1]:
from sklearn.datasets import load_iris
data = load_iris()
X = data.data
y = data.target

In [2]:
import optuna, uuid, mlflow
from sklearn.ensemble  import AdaBoostClassifier
from sklearn.model_selection import cross_val_score, cross_validate
import mlflow
from sklearn.model_selection import cross_validate

def clf_cross_score(model_instance, X, y, k):
    cv_results = cross_validate(model_instance, X, y, cv=k, 
                                scoring=['f1_weighted', 'accuracy', 'recall', 'precision'], 
                                n_jobs=-1,
                                return_train_score=True)

    train_f1_score = cv_results['train_f1_weighted'].mean()
    test_f1_score = cv_results['test_f1_weighted'].mean()

    train_accuracy_score = cv_results['train_accuracy'].mean()
    test_accuracy_score = cv_results['test_accuracy'].mean()

    train_recall_score = cv_results['train_recall'].mean()
    test_recall_score = cv_results['test_recall'].mean()

    train_precision_score = cv_results['train_precision'].mean()
    test_precision_score = cv_results['test_precision'].mean()

    metrics = {'train_f1_score': train_f1_score, 'test_f1_score': test_f1_score,
            'train_accuracy_score': train_accuracy_score, 'test_accuracy_score': test_accuracy_score,
            'train_recall_score': train_recall_score, 'test_recall_score': test_recall_score,
            'train_precision_score': train_precision_score, 'test_precision_score': test_precision_score}
    mlflow.log_metrics(metrics)

    return train_f1_score


UNIQUE_PREFIX = str(uuid.uuid4())[:8]

def objective_AdaBoostClassifier(trial, X_train, y_train):    
    # 모델 생성
    params = {
            'n_estimators': trial.suggest_int('n_estimators', 50, 500),
            'learning_rate': trial.suggest_float('learning_rate', 0.1, 1.0),
            'random_state': trial.suggest_int('random_state', 42, 123),
            }
    
    run_name = f"{UNIQUE_PREFIX}AdaBoostClassifier-{trial.number}"
    with mlflow.start_run(run_name=run_name):
        mlflow.log_params(params)
        model_instance = AdaBoostClassifier(**params)
        
        score = clf_cross_score(model_instance, X_train, y_train, 5)

    return score

def tune_AdaBoostClassifier(n_trials, X_train, y_train):
    experiment_name = 'AdaBoostClassifier-hpo'
    mlflow.set_tracking_uri('http://127.0.0.1:8081')
    mlflow.set_experiment(experiment_name)

    study = optuna.create_study(direction='maximize')  # 'maximize'일 경우 정확도 같은 지표를 최대화
    study.optimize(lambda trial: objective_AdaBoostClassifier(trial, X_train, y_train), n_trials=n_trials)
    best_params = study.best_params  
    # best_model_value = study.best_value

    run_name = 'best-model-AdaBoostClassifier'
    with mlflow.start_run(run_name=run_name):
        mlflow.log_params(best_params)
        model_instance = AdaBoostClassifier(**best_params)
        clf_cross_score(model_instance, X_train, y_train, 5)

  from .autonotebook import tqdm as notebook_tqdm

* 'schema_extra' has been renamed to 'json_schema_extra'


In [3]:
tune_AdaBoostClassifier(5, X, y)

[I 2023-11-24 00:33:11,373] A new study created in memory with name: no-name-3a499426-844e-4b45-8070-8d245dd46ec3
[I 2023-11-24 00:33:15,470] Trial 0 finished with value: 0.9360150028014346 and parameters: {'n_estimators': 197, 'learning_rate': 0.5353147776597884, 'random_state': 79}. Best is trial 0 with value: 0.9360150028014346.
[I 2023-11-24 00:33:17,668] Trial 1 finished with value: 0.9308923681911152 and parameters: {'n_estimators': 424, 'learning_rate': 0.6203825089634724, 'random_state': 105}. Best is trial 0 with value: 0.9360150028014346.
[I 2023-11-24 00:33:18,277] Trial 2 finished with value: 0.9497369407939417 and parameters: {'n_estimators': 200, 'learning_rate': 0.8761497114604095, 'random_state': 114}. Best is trial 2 with value: 0.9497369407939417.
[I 2023-11-24 00:33:19,560] Trial 3 finished with value: 0.9565317210435795 and parameters: {'n_estimators': 459, 'learning_rate': 0.985935147894125, 'random_state': 118}. Best is trial 3 with value: 0.9565317210435795.
[I 2