In [3]:
import bentoml, mlflow, optuna, uuid
from sklearn.model_selection import train_test_split, cross_validate
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.datasets import load_iris
data = load_iris()
X = data.data
y = data.target
UNIQUE_PREFIX = str(uuid.uuid4())[:8]
def objective(trial, X, y):
    param = {
        'n_estimators': trial.suggest_int('n_estimators', 10, 100),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 1.0),
    }
    mlflow.log_params(param)
    model = AdaBoostClassifier(**param)
    scores = cross_validate(model, X, y, cv=5, n_jobs=-1, return_train_score=True, scoring=['f1_weighted', 'accuracy', 'recall', 'precision'])

    # mlflow.log_metrics('scores', scores)
    # mlflow.log_metric('train_accuracy', scores['train_accuracy'].mean())

    # mlflow.log_metric('test_accuracy', scores['test_accuracy'].mean())
    # mlflow.log_metric('test_recall', scores['test_recall'].mean())
    # mlflow.log_metric('test_precision', scores['test_precision'].mean())
    # mlflow.log_metric('test_f1_weighted', scores['test_f1_weighted'].mean())
    
    train_f1_score = scores['train_f1_weighted'].mean()
    test_f1_score = scores['test_f1_weighted'].mean()

    train_accuracy_score = scores['train_accuracy'].mean()
    test_accuracy_score = scores['test_accuracy'].mean()

    train_recall_score = scores['train_recall'].mean()
    test_recall_score = scores['test_recall'].mean()

    train_precision_score = scores['train_precision'].mean()
    test_precision_score = scores['test_precision'].mean()

    metrics = {'train_f1_score': train_f1_score, 'test_f1_score': test_f1_score,
            'train_accuracy_score': train_accuracy_score, 'test_accuracy_score': test_accuracy_score,
            'train_recall_score': train_recall_score, 'test_recall_score': test_recall_score,
            'train_precision_score': train_precision_score, 'test_precision_score': test_precision_score}
    mlflow.log_metrics(metrics)
    
    # for metric_name, score_value in scores.items():
    #     if metric_name == 'fit_time' or metric_name == 'score_time':
    #         continue
    #     else:
    #         mlflow.log_metric(metric_name, score_value.mean())
            
    return scores['train_accuracy'].mean()
mlflow.set_tracking_uri('http://127.0.0.1:8081')
study = optuna.create_study(study_name='ada-hpo', direction='maximize', load_if_exists=True)
study.optimize(lambda trial: objective(trial, X, y), n_trials=5)
best_param = study.best_params

[I 2023-11-24 00:28:17,489] A new study created in memory with name: ada-hpo
