In [6]:
import pandas as pd
import numpy as np

import optuna

import sklearn.ensemble as ensemble
import sklearn.metrics
from sklearn.model_selection import train_test_split


# Hyperarameter Tuning for Classification With Optuna

In [3]:
df = pd.read_csv("train.csv", index_col=[0])
df.head()
# Its cleadned and scaled data

Unnamed: 0,CustomerID,Age,TypeofContact,CityTier,DurationOfPitch,Occupation,Gender,NumberOfPersonVisiting,NumberOfFollowups,ProductPitched,PreferredPropertyStar,MaritalStatus,NumberOfTrips,Passport,PitchSatisfactionScore,OwnCar,NumberOfChildrenVisiting,Designation,MonthlyIncome,ProdTaken
0,0.591569,0.418605,0.0,1.0,0.063492,0.666667,1.0,0.5,0.625,0.5,0.0,0.333333,0.166667,0.0,0.5,1.0,0.333333,0.0,0.524538,0
1,0.854717,0.674419,0.0,1.0,0.126984,1.0,0.0,0.75,1.0,0.75,0.5,0.666667,0.666667,1.0,1.0,1.0,0.333333,0.75,1.0,0
2,0.739922,0.27907,0.0,1.0,0.380952,0.666667,1.0,0.5,0.875,0.5,0.0,1.0,0.166667,0.0,0.75,1.0,0.333333,0.0,0.583706,0
3,0.294659,0.418605,1.0,0.0,0.15873,0.0,0.0,0.5,0.375,0.5,0.5,0.666667,0.0,0.0,0.5,1.0,0.333333,0.0,0.356895,0
4,0.826478,0.209302,0.0,0.0,0.253968,0.0,0.0,0.5,0.875,0.0,0.5,0.666667,0.333333,0.0,0.5,1.0,0.666667,0.25,0.403891,0


In [4]:

X = df.drop("ProdTaken", axis=1)
y = df.ProdTaken

#### In optuna, A Trial represents a single call of the objective function
#### Study shows an optimization session which contains a set of trials
#### Study: optimization based on an objective function
#### Trial: a single execution of the objective function

In [10]:
def objective(trial):
    #  hyperparameter setting
    # Here i have just taken parameters for RandomForestClassifier
    entropy = trial.suggest_categorical('criterion', ["gini", "entropy"])
    n_estimators = trial.suggest_int("n_estimators", 10, 1000)
    max_depth = trial.suggest_int("max_depth", 3, 20)
    max_features  =  trial.suggest_uniform("max_features", 0.01, 1.0)

    # data loading and train-test split
    df = pd.read_csv("train.csv", index_col=[0])
    X = df.drop("ProdTaken", axis=1)
    y = df.ProdTaken
    X_train, X_val, y_train, y_val = train_test_split(X, y, random_state=0)
    
    # model training and evaluation
    model = ensemble.RandomForestClassifier(
        n_estimators = n_estimators,
        criterion= entropy,
        max_depth = max_depth,
        max_features = max_features
    )
    model.fit(X_train, y_train)
    y_pred = model.predict(X_val)
    
    # Just wanna to improve f1 score (You can replace this with according to your needs)
    error = sklearn.metrics.f1_score(y_val, y_pred)

    # output: evaluation score
    return error

###  **Creating an study for that ML model and optimize it**

In [12]:
# In Optuna, we use the study object to manage optimization.
# Method :func:`~optuna.create_study` returns a study object.
# A study object has useful properties for analyzing the optimization outcome.
study = optuna.create_study(direction='maximize') #Set minimize for minimization and maximize for maximization.
#To start the optimization, we create a study object and pass the objective function to method
study.optimize(objective, n_trials=50)

[32m[I 2022-12-23 17:42:55,785][0m A new study created in memory with name: no-name-7e1fc8dd-f92a-4e2d-ad4c-e63a8b055b42[0m
  max_features  =  trial.suggest_uniform("max_features", 0.01, 1.0)
[32m[I 2022-12-23 17:43:00,795][0m Trial 0 finished with value: 0.9282868525896415 and parameters: {'criterion': 'entropy', 'n_estimators': 296, 'max_depth': 16, 'max_features': 0.20896660372504566}. Best is trial 0 with value: 0.9282868525896415.[0m
  max_features  =  trial.suggest_uniform("max_features", 0.01, 1.0)
[32m[I 2022-12-23 17:43:23,092][0m Trial 1 finished with value: 0.9252243270189432 and parameters: {'criterion': 'gini', 'n_estimators': 971, 'max_depth': 15, 'max_features': 0.4598905844877823}. Best is trial 0 with value: 0.9282868525896415.[0m
  max_features  =  trial.suggest_uniform("max_features", 0.01, 1.0)
[32m[I 2022-12-23 17:43:31,829][0m Trial 2 finished with value: 0.8694779116465864 and parameters: {'criterion': 'gini', 'n_estimators': 396, 'max_depth': 8, 'max_

In [14]:
# To get the dictionary of parameter name and parameter values:
print("Return a dictionary of parameter name and parameter values:",study.best_params)

# To get the best observed value of the objective function:
print("Return the best observed value of the objective function:",study.best_value)

# To get the best trial:
print("Return the best trial:",study.best_trial)

# To get all trials:
# print("Return all the trials:", study.trials)

Return a dictionary of parameter name and parameter values: {'criterion': 'entropy', 'n_estimators': 816, 'max_depth': 19, 'max_features': 0.05546128085242047}
Return the best observed value of the objective function: 0.9335976214073339
Return the best trial: FrozenTrial(number=49, values=[0.9335976214073339], datetime_start=datetime.datetime(2022, 12, 23, 17, 50, 5, 478145), datetime_complete=datetime.datetime(2022, 12, 23, 17, 50, 17, 366867), params={'criterion': 'entropy', 'n_estimators': 816, 'max_depth': 19, 'max_features': 0.05546128085242047}, distributions={'criterion': CategoricalDistribution(choices=('gini', 'entropy')), 'n_estimators': IntDistribution(high=1000, log=False, low=10, step=1), 'max_depth': IntDistribution(high=20, log=False, low=3, step=1), 'max_features': FloatDistribution(high=1.0, log=False, low=0.01, step=None)}, user_attrs={}, system_attrs={}, intermediate_values={}, trial_id=49, state=TrialState.COMPLETE, value=None)


In [15]:
#importing all the plot functions
from optuna.visualization import plot_edf
from optuna.visualization import plot_optimization_history
from optuna.visualization import plot_parallel_coordinate
from optuna.visualization import plot_param_importances
from optuna.visualization import plot_slice

In [17]:
# Visualize the optimization history. See :func:`~optuna.visualization.plot_optimization_history` for the details.
plot_optimization_history(study)

ImportError: Tried to import 'plotly' but failed. Please make sure that the package is installed correctly to use this feature. Actual error: No module named 'plotly'.