<a href="https://colab.research.google.com/github/harrybaines/ml-resources/blob/main/optuna/optuna.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Optuna Example

Optuna example that optimizes a classifier configuration for Iris dataset using sklearn. In this example, we optimize a classifier configuration for Iris dataset. Classifiers are from scikit-learn. We optimize both the choice of classifier (among SVC and RandomForest) and their hyperparameters.

Optuna tutorial docs: https://optuna.readthedocs.io/en/stable/tutorial/index.html

In [None]:
!pip install optuna

In [None]:
import optuna
import sklearn.datasets
import sklearn.ensemble
import sklearn.model_selection
import sklearn.svm

In [None]:
# FYI: Objective functions can take additional arguments
# (https://optuna.readthedocs.io/en/stable/faq.html#objective-func-additional-args).
def objective(trial):
    iris = sklearn.datasets.load_iris()
    x, y = iris.data, iris.target

    classifier_name = trial.suggest_categorical("classifier", ["SVC", "RandomForest"])
    if classifier_name == "SVC":
        svc_c = trial.suggest_float("svc_c", 1e-10, 1e10, log=True)
        classifier_obj = sklearn.svm.SVC(C=svc_c, gamma="auto")
    else:
        rf_max_depth = trial.suggest_int("rf_max_depth", 2, 32, log=True)
        classifier_obj = sklearn.ensemble.RandomForestClassifier(
            max_depth=rf_max_depth, n_estimators=10
        )

    score = sklearn.model_selection.cross_val_score(classifier_obj, x, y, n_jobs=-1, cv=3)
    accuracy = score.mean()
    return accuracy

In [None]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=100)

[32m[I 2022-10-17 12:40:57,911][0m A new study created in memory with name: no-name-a6bd25a6-6589-45fd-8405-ef24d98864b3[0m
[32m[I 2022-10-17 12:41:00,313][0m Trial 0 finished with value: 0.9466666666666667 and parameters: {'classifier': 'RandomForest', 'rf_max_depth': 10}. Best is trial 0 with value: 0.9466666666666667.[0m
[32m[I 2022-10-17 12:41:00,467][0m Trial 1 finished with value: 0.96 and parameters: {'classifier': 'RandomForest', 'rf_max_depth': 3}. Best is trial 1 with value: 0.96.[0m
[32m[I 2022-10-17 12:41:00,666][0m Trial 2 finished with value: 0.96 and parameters: {'classifier': 'RandomForest', 'rf_max_depth': 21}. Best is trial 1 with value: 0.96.[0m
[32m[I 2022-10-17 12:41:00,719][0m Trial 3 finished with value: 0.7466666666666667 and parameters: {'classifier': 'SVC', 'svc_c': 0.026029191609081185}. Best is trial 1 with value: 0.96.[0m
[32m[I 2022-10-17 12:41:00,763][0m Trial 4 finished with value: 0.98 and parameters: {'classifier': 'SVC', 'svc_c': 7.37

In [None]:
study.best_trial

FrozenTrial(number=4, values=[0.98], datetime_start=datetime.datetime(2022, 10, 17, 12, 41, 0, 724012), datetime_complete=datetime.datetime(2022, 10, 17, 12, 41, 0, 763359), params={'classifier': 'SVC', 'svc_c': 7.37387215533868}, distributions={'classifier': CategoricalDistribution(choices=('SVC', 'RandomForest')), 'svc_c': FloatDistribution(high=10000000000.0, log=True, low=1e-10, step=None)}, user_attrs={}, system_attrs={}, intermediate_values={}, trial_id=4, state=TrialState.COMPLETE, value=None)

In [None]:
study.best_params

{'classifier': 'SVC', 'svc_c': 7.37387215533868}