In [4]:
import optuna
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import os

In [3]:
os.chdir("../")
os.getcwd()

'/home/izam/coding/Customer-Churn'

In [5]:
train = pd.read_csv("./artifacts/data_transformation/train.csv")
test = pd.read_csv("./artifacts/data_transformation/test.csv")

train.shape, test.shape

((5634, 20), (1409, 20))

In [7]:
def objective(trial):
        X_train= train.drop(["Churn"], axis=1)
        y_train = train["Churn"]
        X_test = test.drop(["Churn"], axis=1)
        y_test = test["Churn"]  

        n_estimators = trial.suggest_int('n_estimators', 50, 200)
        max_depth = trial.suggest_int('max_depth', 5, 30)
        min_samples_split = trial.suggest_float('min_samples_split', 0.1, 1.0)
        min_samples_leaf = trial.suggest_float('min_samples_leaf', 0.1, 0.5)

        model = RandomForestClassifier(
                n_estimators=n_estimators,
                max_depth=max_depth,
                min_samples_split=min_samples_split,
                min_samples_leaf=min_samples_leaf,
                random_state=42
        )

        model.fit(X_train, y_train)
        prediction = model.predict(X_test)
        accuracy = accuracy_score(y_pred=prediction, y_true=y_test)
        
        return accuracy

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=1000)

best_params = study.best_params
print(f"best params  -{best_params}")


best_trial = study.best_trial
print(f"Best Trial Number: {best_trial.number}")
print(f"Best Trial Value (Accuracy): {best_trial.value}")

[I 2023-12-15 19:21:49,517] A new study created in memory with name: no-name-8242bf99-3776-4d06-9bbe-c902678782be
[I 2023-12-15 19:21:49,638] Trial 0 finished with value: 0.7530163236337828 and parameters: {'n_estimators': 111, 'max_depth': 17, 'min_samples_split': 0.7876141050279865, 'min_samples_leaf': 0.36757584111609753}. Best is trial 0 with value: 0.7530163236337828.
[I 2023-12-15 19:21:49,704] Trial 1 finished with value: 0.7530163236337828 and parameters: {'n_estimators': 60, 'max_depth': 10, 'min_samples_split': 0.6838014609613262, 'min_samples_leaf': 0.11223665078603369}. Best is trial 0 with value: 0.7530163236337828.
[I 2023-12-15 19:21:49,829] Trial 2 finished with value: 0.7530163236337828 and parameters: {'n_estimators': 118, 'max_depth': 22, 'min_samples_split': 0.8070402065381571, 'min_samples_leaf': 0.43918942284171825}. Best is trial 0 with value: 0.7530163236337828.
[I 2023-12-15 19:21:49,915] Trial 3 finished with value: 0.7530163236337828 and parameters: {'n_estim

best params  -{'n_estimators': 111, 'max_depth': 17, 'min_samples_split': 0.7876141050279865, 'min_samples_leaf': 0.36757584111609753}
Best Trial Number: 0
Best Trial Value (Accuracy): 0.7530163236337828
