An open source hyperparameter optimization framework to automate hyperparameter search

### Getting the optuna

In [1]:
!pip install --quiet optuna

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m413.4/413.4 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.4/233.4 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.6/78.6 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
import optuna

optuna.__version__

'3.5.0'

### with sklearn

In [3]:
# defining a model

import sklearn.datasets
import sklearn.ensemble
import sklearn.model_selection

def objective():
    iris = sklearn.datasets.load_iris()  # Prepare the data.

    clf = sklearn.ensemble.RandomForestClassifier(
        n_estimators=5, max_depth=3)  # Define the model.

    return sklearn.model_selection.cross_val_score(
        clf, iris.data, iris.target, n_jobs=-1, cv=3).mean()  # Train and evaluate the model.

print('Accuracy: {}'.format(objective()))

Accuracy: 0.96


In [23]:
%%time
# optimizing that with optuna

import optuna

def objective(trial):
  iris = sklearn.datasets.load_iris()

  n_estimators = trial.suggest_int("n_estimators", 2, 20)
  max_depth = int(trial.suggest_float("max_depth", 1, 32, log=True))

  model = sklearn.ensemble.RandomForestClassifier(
      n_estimators=n_estimators,
      max_depth=max_depth,
  )

  return sklearn.model_selection.cross_val_score(
      model,
      iris.data,
      iris.target,
      n_jobs=-1,
      cv=3,
  ).mean() # have to maximize this value

study1 = optuna.create_study(direction="maximize")
study1.optimize(objective, n_trials=100)

trial = study1.best_trial

[I 2024-01-22 11:08:38,789] A new study created in memory with name: no-name-bfb6ce41-d28b-43eb-9b1b-afaff6c70c27
[I 2024-01-22 11:08:41,342] Trial 0 finished with value: 0.9533333333333333 and parameters: {'n_estimators': 11, 'max_depth': 4.416712658862447}. Best is trial 0 with value: 0.9533333333333333.
[I 2024-01-22 11:08:41,520] Trial 1 finished with value: 0.9533333333333333 and parameters: {'n_estimators': 7, 'max_depth': 6.685857148580248}. Best is trial 0 with value: 0.9533333333333333.
[I 2024-01-22 11:08:41,616] Trial 2 finished with value: 0.9533333333333333 and parameters: {'n_estimators': 4, 'max_depth': 25.289030206083872}. Best is trial 0 with value: 0.9533333333333333.
[I 2024-01-22 11:08:41,850] Trial 3 finished with value: 0.9533333333333333 and parameters: {'n_estimators': 18, 'max_depth': 2.459567584600423}. Best is trial 0 with value: 0.9533333333333333.
[I 2024-01-22 11:08:41,908] Trial 4 finished with value: 0.94 and parameters: {'n_estimators': 2, 'max_depth': 

CPU times: user 2.93 s, sys: 153 ms, total: 3.09 s
Wall time: 14.3 s


In [24]:
print('Accuracy: {}'.format(trial.value))
print("Best hyperparameters: {}".format(trial.params))

Accuracy: 0.9733333333333333
Best hyperparameters: {'n_estimators': 9, 'max_depth': 31.35957108500867}


In [9]:
# with multiple models

import sklearn.svm

def objective(trial):
  iris = sklearn.datasets.load_iris()

  classifier = trial.suggest_categorical("classifier", ["RandomForest", "SVC"])

  if classifier == 'RandomForest':
      n_estimators = trial.suggest_int('n_estimators', 2, 20)
      max_depth = int(trial.suggest_float('max_depth', 1, 32, log=True))

      clf = sklearn.ensemble.RandomForestClassifier(
          n_estimators=n_estimators, max_depth=max_depth)
  else:
      c = trial.suggest_float('svc_c', 1e-10, 1e10, log=True)

      clf = sklearn.svm.SVC(C=c, gamma='auto')

  return sklearn.model_selection.cross_val_score(
      clf, iris.data, iris.target, n_jobs=-1, cv=3).mean()

study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=300)

trial = study.best_trial

[I 2024-01-22 10:45:57,964] A new study created in memory with name: no-name-68e04315-ce7b-4cc8-8ba9-a5c59b359c13
[I 2024-01-22 10:45:58,032] Trial 0 finished with value: 0.32 and parameters: {'classifier': 'SVC', 'svc_c': 0.0026020063674977996}. Best is trial 0 with value: 0.32.
[I 2024-01-22 10:45:58,068] Trial 1 finished with value: 0.32 and parameters: {'classifier': 'SVC', 'svc_c': 0.0019574214016777275}. Best is trial 0 with value: 0.32.
[I 2024-01-22 10:45:58,148] Trial 2 finished with value: 0.9466666666666667 and parameters: {'classifier': 'RandomForest', 'n_estimators': 5, 'max_depth': 11.987665562366972}. Best is trial 2 with value: 0.9466666666666667.
[I 2024-01-22 10:45:58,174] Trial 3 finished with value: 0.96 and parameters: {'classifier': 'SVC', 'svc_c': 734.4812747958325}. Best is trial 3 with value: 0.96.
[I 2024-01-22 10:45:58,207] Trial 4 finished with value: 0.9466666666666667 and parameters: {'classifier': 'SVC', 'svc_c': 203.23057679139825}. Best is trial 3 with 

In [10]:
print('Accuracy: {}'.format(trial.value))
print("Best hyperparameters: {}".format(trial.params))

Accuracy: 0.9866666666666667
Best hyperparameters: {'classifier': 'SVC', 'svc_c': 4.664997738421873}


### Visualizaitons

In [11]:
# history

optuna.visualization.plot_optimization_history(study)

In [14]:
# accuracies

optuna.visualization.plot_slice(study)

In [13]:
# Plotting the accuracy surface for the hyperparameters involved in the random forest model.

optuna.visualization.plot_contour(study, params=['n_estimators', 'max_depth'])

In [27]:
viz = optuna.visualization.plot_timeline(study)
viz.show()


plot_timeline is experimental (supported from v3.2.0). The interface can change in the future.



In [28]:
fig = optuna.visualization.plot_param_importances(study1)
fig.show()

### Storing in DB

In [29]:
def objective(trial):
    x = trial.suggest_float("x", 0, 10)
    return x**2

In [32]:
# creating and saving the study

study = optuna.create_study(storage="sqlite:///data.db", study_name="my_study1")
study.optimize(objective, n_trials=3)

[I 2024-01-22 11:12:43,660] A new study created in RDB with name: my_study1
[I 2024-01-22 11:12:43,817] Trial 0 finished with value: 5.899388023588423 and parameters: {'x': 2.428865583680666}. Best is trial 0 with value: 5.899388023588423.
[I 2024-01-22 11:12:43,909] Trial 1 finished with value: 85.30654390036123 and parameters: {'x': 9.236154172617585}. Best is trial 0 with value: 5.899388023588423.
[I 2024-01-22 11:12:44,020] Trial 2 finished with value: 60.519786878137175 and parameters: {'x': 7.779446437770311}. Best is trial 0 with value: 5.899388023588423.


In [34]:
# loading the saved study

loaded_study = optuna.load_study(study_name="my_study1", storage="sqlite:///data.db")
len(loaded_study.trials) == len(study.trials)

True

### Saving the artifacts

### custom pruners

### custom samplers

### playing with studies

### Terminator

### playing with trials