In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

data = load_iris()
X = data.data
y = data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [2]:
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(random_state=42)


In [3]:
from sklearn.model_selection import GridSearchCV

# hyperparameter grid
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
}

# GridSearchCV object
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='accuracy')

# Fitting the model
grid_search.fit(X_train, y_train)

# The Best Parameters
print("Best hyperparameters:", grid_search.best_params_)


Best hyperparameters: {'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 200}


In [4]:
print("Best CV accuracy from GridSearch:", grid_search.best_score_)

Best CV accuracy from GridSearch: 0.9583333333333334


In [5]:
from sklearn.model_selection import RandomizedSearchCV

# hyperparameter distribution
param_dist = {
    'n_estimators': np.arange(50, 201, 50),
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
}

# RandomizedSearchCV object
random_search = RandomizedSearchCV(estimator=model, param_distributions=param_dist, n_iter=100, cv=5, scoring='accuracy', random_state=42)

# Fitting the model
random_search.fit(X_train, y_train)

# The Best Parameters
print("Best hyperparameters:", random_search.best_params_)


Best hyperparameters: {'n_estimators': np.int64(200), 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_depth': 10}


In [6]:
print("Best CV accuracy from RandomSearch:", random_search.best_score_)

Best CV accuracy from RandomSearch: 0.9583333333333334


In [7]:
from sklearn.metrics import classification_report
from sklearn.model_selection import cross_val_score

best_model = grid_search.best_estimator_ 

# Calculate cross-validation scores
cv_scores = cross_val_score(best_model, X_train, y_train, cv=5)
print("Cross-validation accuracy scores:", cv_scores)
print("Mean accuracy:", cv_scores.mean())

# Evaluations using the test set
y_pred = best_model.predict(X_test)
print("Classification report:\n", classification_report(y_test, y_pred))


Cross-validation accuracy scores: [0.95833333 1.         0.875      1.         0.95833333]
Mean accuracy: 0.9583333333333334
Classification report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



In [None]:
import optuna
from sklearn.metrics import accuracy_score

# Using Optuna
def objective(trial):
    n_estimators = trial.suggest_int('n_estimators', 50, 200, step=50)
    max_depth = trial.suggest_int('max_depth', 5, 20)
    min_samples_split = trial.suggest_int('min_samples_split', 2, 10)
    min_samples_leaf = trial.suggest_int('min_samples_leaf', 1, 4)
    
    model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, 
                                   min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, random_state=42)
    
    # Evaluate the model using cross-validation
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    
    return accuracy

# Creating Optuna
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)


  from .autonotebook import tqdm as notebook_tqdm
[I 2025-04-06 18:11:09,866] A new study created in memory with name: no-name-581d69fc-d15a-426b-96aa-3f6c42a54f40
[I 2025-04-06 18:11:10,073] Trial 0 finished with value: 1.0 and parameters: {'n_estimators': 50, 'max_depth': 18, 'min_samples_split': 7, 'min_samples_leaf': 4}. Best is trial 0 with value: 1.0.
[I 2025-04-06 18:11:10,506] Trial 1 finished with value: 1.0 and parameters: {'n_estimators': 150, 'max_depth': 19, 'min_samples_split': 9, 'min_samples_leaf': 1}. Best is trial 0 with value: 1.0.
[I 2025-04-06 18:11:10,779] Trial 2 finished with value: 1.0 and parameters: {'n_estimators': 100, 'max_depth': 6, 'min_samples_split': 4, 'min_samples_leaf': 4}. Best is trial 0 with value: 1.0.
[I 2025-04-06 18:11:11,430] Trial 3 finished with value: 1.0 and parameters: {'n_estimators': 150, 'max_depth': 8, 'min_samples_split': 7, 'min_samples_leaf': 4}. Best is trial 0 with value: 1.0.
[I 2025-04-06 18:11:11,922] Trial 4 finished with v

Best hyperparameters: {'n_estimators': 50, 'max_depth': 18, 'min_samples_split': 7, 'min_samples_leaf': 4}


In [9]:
print("Best hyperparameters from Optuna:", study.best_params)
print("Best accuracy from Optuna:", study.best_value)

Best hyperparameters from Optuna: {'n_estimators': 50, 'max_depth': 18, 'min_samples_split': 7, 'min_samples_leaf': 4}
Best accuracy from Optuna: 1.0
