In [1]:
import pandas as pd 
import numpy as np
from sklearn import ensemble
from sklearn import metrics
from sklearn import model_selection

from functools import partial
import optuna


def optimize(trial,x,y):
    criterion = trial.suggest_categorical("criterion",["gini","entropy"])
    n_estimators = trial.suggest_int("n_estimators",100,1500)
    max_depth = trial.suggest_int("max_depth",3,15)
    max_features = trial.suggest_uniform("max_features",0.01,1.0)

    model = ensemble.RandomForestClassifier(
        n_estimators=n_estimators,
        max_depth=max_depth,
        max_features=max_features,
        criterion=criterion
    )
    
    kf = model_selection.StratifiedKFold(n_splits=5)
    accuracies = []
    for idx in kf.split(X=x,y=y):
        train_idx,test_idx = idx[0], idx[1]
        xtrain = x[train_idx]
        ytrain = y[train_idx]

        xtest = x[test_idx]
        ytest = y[test_idx]

        model.fit(xtrain,ytrain)
        preds = model.predict(xtest)
        fold_acc = metrics.accuracy_score(ytest,preds)
        accuracies.append(fold_acc)

    return -1.0*np.mean(accuracies)


if __name__ == "__main__":
    df = pd.read_csv("/Users/yunbo/Documents/GitHub/Machine-learning-learning-and-code-practice/Hyper_parameter_tuning/input/train.csv")
    X = df.drop("price_range",axis=1).values
    y = df.price_range.values
    optimization_function = partial(optimize,x=X,y=y)
    
    study = optuna.create_study(direction="minimize")
    study.optimize(optimization_function,n_trials=15)
               

[I 2024-02-08 12:23:10,042] A new study created in memory with name: no-name-65551094-961c-4e10-b05a-4e98a0c37392
  max_features = trial.suggest_uniform("max_features",0.01,1.0)
[I 2024-02-08 12:23:33,902] Trial 0 finished with value: -0.906 and parameters: {'criterion': 'entropy', 'n_estimators': 996, 'max_depth': 9, 'max_features': 0.9791782139181651}. Best is trial 0 with value: -0.906.
  max_features = trial.suggest_uniform("max_features",0.01,1.0)
[I 2024-02-08 12:23:37,305] Trial 1 finished with value: -0.8925000000000001 and parameters: {'criterion': 'entropy', 'n_estimators': 308, 'max_depth': 8, 'max_features': 0.30044753370659677}. Best is trial 0 with value: -0.906.
  max_features = trial.suggest_uniform("max_features",0.01,1.0)
[I 2024-02-08 12:23:42,067] Trial 2 finished with value: -0.8504999999999999 and parameters: {'criterion': 'entropy', 'n_estimators': 636, 'max_depth': 12, 'max_features': 0.11793269844072607}. Best is trial 0 with value: -0.906.
  max_features = tri