In [2]:
import optuna
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
data = load_iris()
X, y = data.data, data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
def funetune_using_optuna(model):
    model_type = model.__class__
    
    

    def objective(trial):
        # Suggest hyperparameters
        n_estimators = trial.suggest_int("n_estimators", 10, 200)
        max_depth = trial.suggest_int("max_depth", 2, 32, log=True)
        min_samples_split = trial.suggest_int("min_samples_split", 2, 20)
        min_samples_leaf = trial.suggest_int("min_samples_leaf", 1, 20)

        # Create the model with suggested hyperparameters
        model = RandomForestClassifier(
            n_estimators=n_estimators,
            max_depth=max_depth,
            min_samples_split=min_samples_split,
            min_samples_leaf=min_samples_leaf,
            random_state=42,
        )
        model.fit(X_train, y_train)
        preds = model.predict(X_test)
        accuracy = accuracy_score(y_test, preds)

        return accuracy  # Optuna will maximize this score

    # Create the study and optimize
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=10)

    print(f"Best Score  : {study.best_value}")
    print(f"Best Params : {study.best_params}")
    
    return study.best_params

[I 2024-11-30 17:33:33,672] A new study created in memory with name: no-name-bc41d479-ab73-49c5-8e6d-655a0b3f73d6
[I 2024-11-30 17:33:33,815] Trial 0 finished with value: 1.0 and parameters: {'n_estimators': 135, 'max_depth': 5, 'min_samples_split': 14, 'min_samples_leaf': 9}. Best is trial 0 with value: 1.0.
[I 2024-11-30 17:33:33,861] Trial 1 finished with value: 1.0 and parameters: {'n_estimators': 42, 'max_depth': 2, 'min_samples_split': 13, 'min_samples_leaf': 9}. Best is trial 0 with value: 1.0.
[I 2024-11-30 17:33:33,969] Trial 2 finished with value: 1.0 and parameters: {'n_estimators': 110, 'max_depth': 14, 'min_samples_split': 19, 'min_samples_leaf': 13}. Best is trial 0 with value: 1.0.
[I 2024-11-30 17:33:34,024] Trial 3 finished with value: 1.0 and parameters: {'n_estimators': 56, 'max_depth': 2, 'min_samples_split': 8, 'min_samples_leaf': 18}. Best is trial 0 with value: 1.0.
[I 2024-11-30 17:33:34,121] Trial 4 finished with value: 1.0 and parameters: {'n_estimators': 89, 

Best parameters: {'n_estimators': 135, 'max_depth': 5, 'min_samples_split': 14, 'min_samples_leaf': 9}
Best accuracy: 1.0


In [18]:
model = RandomForestClassifier()

In [20]:
# dir(model)
model.__class__

sklearn.ensemble._forest.RandomForestClassifier

In [None]:
def perform_kfold_cv(k=5, model, X, y):
    pass