In [7]:
import joblib
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import (
    StratifiedKFold,
    cross_validate,
    GridSearchCV)
from sklearn.metrics import confusion_matrix
warnings.filterwarnings("ignore")

## Load the data

In [23]:
X_train = np.load('artifacts/X_train.npz')['arr_0']
Y_train = np.load('artifacts/Y_train.npz')['arr_0']
X_test = np.load('artifacts/X_test.npz')['arr_0']
Y_test = np.load('artifacts/Y_test.npz')['arr_0']



## Define Multi Models


In [24]:
#models = {
#    "Logistic Regression": LogisticRegression(),
#    "Random Forest": RandomForestClassifier(),
#    "Decision Tree": DecisionTreeClassifier()   
#}

lr_param_grid = {
    'max_iter': [1000, 5000, 10000]
}

dt_param_grid = {
    'max_depth': [8, 12, 16, 20],
    'criterion': ['gini', 'entropy', "log_loss"]
}

rf_param_grid = {
    'n_estimators': [50, 100, 150, 200],
    'max_depth': [8, 12, 16, 20],
    'criterion': ['gini', 'entropy', "log_loss"]
}

param_grids= {
    'LogisticRegression': lr_param_grid,
    'DecisionTreeClassifier': dt_param_grid,
    'RandomForestClassifier': rf_param_grid
}


##  Define Multi Models

In [25]:
models = {
    "LogisticRegression": LogisticRegression(),
    "RandomForestClassifier": RandomForestClassifier(),
    "DecisionTreeClassifier": DecisionTreeClassifier()
}


## Configure K-Fold CV

In [26]:
cv = StratifiedKFold(
    n_splits= 5,
    random_state=42,
    shuffle=True
)

## Multi Model Training

In [27]:
grid_search_results = {}
for model_name, model in models.items():
    print(f"\n---Tuning {model_name}---")

    param_grid = param_grids[model_name]

    
    grid_search = GridSearchCV(
        estimator=model,
        param_grid=param_grid,
        scoring='f1',
        cv=cv,
        verbose=1,
        return_train_score=False
    )

    print(f"Fitting gridSearchCV for {model_name}")

    grid_search.fit(X_train, Y_train)

    grid_search_results[model_name] = grid_search
    
    print(f"{model_name} gridSearchCV completed")
    print(f"Best Parameters: {grid_search.best_params_}")
    print(f"Best CV score: {grid_search.best_score_}")


---Tuning LogisticRegression---
Fitting gridSearchCV for LogisticRegression
Fitting 5 folds for each of 3 candidates, totalling 15 fits
LogisticRegression gridSearchCV completed
Best Parameters: {'max_iter': 1000}
Best CV score: 0.7324904731803483

---Tuning RandomForestClassifier---
Fitting gridSearchCV for RandomForestClassifier
Fitting 5 folds for each of 48 candidates, totalling 240 fits
RandomForestClassifier gridSearchCV completed
Best Parameters: {'criterion': 'log_loss', 'max_depth': 20, 'n_estimators': 150}
Best CV score: 0.894502025545515

---Tuning DecisionTreeClassifier---
Fitting gridSearchCV for DecisionTreeClassifier
Fitting 5 folds for each of 12 candidates, totalling 60 fits
DecisionTreeClassifier gridSearchCV completed
Best Parameters: {'criterion': 'gini', 'max_depth': 20}
Best CV score: 0.8301192551047123


In [28]:
grid_search_results

{'LogisticRegression': GridSearchCV(cv=StratifiedKFold(n_splits=5, random_state=42, shuffle=True),
              estimator=LogisticRegression(),
              param_grid={'max_iter': [1000, 5000, 10000]}, scoring='f1',
              verbose=1),
 'RandomForestClassifier': GridSearchCV(cv=StratifiedKFold(n_splits=5, random_state=42, shuffle=True),
              estimator=RandomForestClassifier(),
              param_grid={'criterion': ['gini', 'entropy', 'log_loss'],
                          'max_depth': [8, 12, 16, 20],
                          'n_estimators': [50, 100, 150, 200]},
              scoring='f1', verbose=1),
 'DecisionTreeClassifier': GridSearchCV(cv=StratifiedKFold(n_splits=5, random_state=42, shuffle=True),
              estimator=DecisionTreeClassifier(),
              param_grid={'criterion': ['gini', 'entropy', 'log_loss'],
                          'max_depth': [8, 12, 16, 20]},
              scoring='f1', verbose=1)}