# Hyperparameter tuning models - Customer Churn

In [None]:
!pip install -r requirements.txt --quiet

In [None]:
import pandas as pd
cust_df = pd.read_csv("data/synth_customer_churn.csv")

In [None]:
y = cust_df['ChurnCategory'].map({'Low Risk': 0, 
                                  'Medium Risk': 1, 
                                  'High Risk': 2})

X = cust_df.drop(columns=['ChurnCategory', 
                          'CustomerID'])

In [None]:
from sklearn.model_selection import train_test_split   
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Option 1 - Tuning ensemble Stacked classifier

In [None]:
import json
with open("models/configs/stack_param_grid.json", "r") as f:
    config = json.load(f)

In [None]:
param_grid = dict(config)

In [None]:
import joblib
stacking_pipeline = joblib.load("models/classifiers/stack_class_pipe.joblib")

In [None]:
from sklearn.model_selection import GridSearchCV
grid = GridSearchCV(stacking_pipeline, param_grid, 
                    cv=5, scoring='f1', 
                    n_jobs=-1, verbose=2)


In [None]:
grid.fit(X_train, y_train)
best_model = grid.best_estimator_

In [None]:
best_model.score(X_test, y_test)

In [None]:
import os
os.makedirs("models/fitted_models", exist_ok=True)

In [None]:
joblib.dump(best_model, "models/fitted_models/stack_class_best_model.joblib")

## Option 2 - Load in each model pipeline and tune

In [None]:
CLASS_PATH = "models/classifiers/"
files = [f for f in os.listdir(CLASS_PATH) 
         if f.endswith("_pipeline.joblib") 
         and not f.startswith("best_")]
files

In [None]:
pipelines = {}
for file in files:
    name = file.split("_")[0]
    pipelines[name] = joblib.load(os.path.join(CLASS_PATH, file))

In [None]:
from scipy.stats import randint, uniform
param_dists = {
    'rf': {
        'classifier__n_estimators': randint(100, 1000),
        'classifier__max_depth': [None, 5, 10, 20, 30]
    },
    'svc': {
        'classifier__C': uniform(0.01, 10),
        'classifier__gamma': ['scale', 'auto']
    },
    'gb': {
        'classifier__n_estimators': randint(100, 1000),
        'classifier__learning_rate': uniform(0.01, 0.3)
    }
}

In [None]:
from sklearn.model_selection import RandomizedSearchCV
best_models = {}
for name, pipeline in pipelines.items():
    print(f"Tuning {name} with RandomizedSearchCV...")
    search = RandomizedSearchCV(
        estimator=pipeline,
        param_distributions=param_dists[name],
        n_iter=25, 
        cv=5,
        scoring='f1_macro',
        n_jobs=-1,
        random_state=42)
    search.fit(X_train, y_train)
    best_models[name] = search
    print(f"Best params for {name}: {search.best_params_}")
    print(f"Best score: {search.best_score_}")

### Compare and select best model

In [None]:
best_overall = max(best_models.items(), 
                   key=lambda x: x[1].best_score_)
print(f"Best overall model: {best_overall[0]} with score {best_overall[1].best_score_}")

## Save best model

In [None]:
joblib.dump(best_overall[1].best_estimator_, 
            f"models/fitted_models/best_model_random_search.joblib")

## Ensemble model evaluation

In [None]:
fitted_final_model = grid.best_estimator_.fit(X_train, y_train)

In [None]:
preds = fitted_final_model.predict(X_test)

In [None]:
from sklearn.metrics import confusion_matrix
classes = ['Low Risk', 'Medium Risk', 'High Risk']
cm = confusion_matrix(y_test, preds)

In [None]:
from modelviz.confusion_matrix import plot_confusion_matrix
plot_confusion_matrix(
            cm=cm, 
            classes=classes,
            model_name="Stacked Classifier",
            table_fontsize=8)