# 04 - Evaluating models

In [None]:
import numpy as np
import pandas as pd
import joblib

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score

SEED=42
np.random.seed(SEED)

In [None]:
df = pd.read_csv("./data/processed/german.csv")

# map credit_risk to 0/1 (good/bad)
y = df["credit_risk"].map({1: 0, 2: 1})

# https://scikit-learn.org/stable/modules/generated/sklearn.compose.ColumnTransformer.html
# https://scikit-learn.org/stable/auto_examples/compose/plot_column_transformer_mixed_types.html
X = df.drop(columns=["credit_risk", "id"])

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=SEED, stratify=y
)

## Helper functions

In [None]:
## output best params found in hyperparameter tuning
def print_best_params_by_metric(cv_results, model_name="model"):
    """
    Outputs best parameter set for each metric.
    """
    
    # using same scoring metrics across all four models
    metrics = ["accuracy", "f1", "precision", "recall", "roc_auc"]
    
    print(f"For {model_name}:")
    for metric in metrics:
        best_index = cv_results[f"mean_test_{metric}"].argmax()
        best_params = {k.replace("param_model__", ""): v[best_index] 
                       for k, v in cv_results.items()
                        if k.startswith("param_model__")}
        best_score = cv_results[f"mean_test_{metric}"][best_index]
        print(f"Best params for {metric}: {best_params}")
        print(f"Best CV {metric}: {best_score}")
    print("------")
    print()

In [None]:
## output test metrics for best fit model
def print_test_metrics(model, X_test, y_test, model_name="model"):
    """
    Prints test set metrics for a fitted model in a simple key-value style.
    
    Parameters:
    - model: fitted sklearn pipeline or classifier
    - model_name: string for labeling output
    """
    best_params = model.best_params_
    
    y_pred = model.predict(X_test)
    y_prob = model.predict_proba(X_test)[:, 1]
    
    metrics = {
        "accuracy": accuracy_score(y_test, y_pred),
        "f1": f1_score(y_test, y_pred),
        "precision": precision_score(y_test, y_pred),
        "recall": recall_score(y_test, y_pred),
        "roc_auc": roc_auc_score(y_test, y_prob)
    }
    
    print(f"Best parameters for {model_name}: {best_params}")
    for metric, value in metrics.items():
        print(f"Test {metric}: {value}")
    print("------")

In [None]:
## import saved models
loaded_models = {}
for name in ["logistic_regression", "knn", "random_forest", "xgboost"]:
    path = f"saved_models/{name}.pkl"
    loaded_models[name] = joblib.load(path)

lr_model = loaded_models["logistic_regression"]
knn_model = loaded_models["knn"]
rf_model = loaded_models["random_forest"]
xgb_model = loaded_models["xgboost"]


## Results

In [None]:
## for each model, output best parameters
print_best_params_by_metric(lr_model.cv_results_, "Logistic Regression")
print_best_params_by_metric(knn_model.cv_results_,"kNN")
print_best_params_by_metric(rf_model.cv_results_, "Random Forest")
print_best_params_by_metric(xgb_model.cv_results_, "XGBoost")

In [None]:
# For Logistic Regression:
# Best params for accuracy: {'C': np.float64(0.615848211066026), 'max_iter': np.int64(100), 'penalty': 'l2', 'solver': 'lbfgs', 'l1_ratio': masked}
# Best CV accuracy: 0.75125
# Best params for f1: {'C': np.float64(4.281332398719396), 'max_iter': np.int64(1000), 'penalty': 'elasticnet', 'solver': 'saga', 'l1_ratio': np.float64(0.75)}
# Best CV f1: 0.5278343535463698
# Best params for precision: {'C': np.float64(0.004832930238571752), 'max_iter': np.int64(100), 'penalty': 'l2', 'solver': 'liblinear', 'l1_ratio': masked}
# Best CV precision: 0.7171428571428571
# Best params for recall: {'C': np.float64(78.47599703514607), 'max_iter': np.int64(100), 'penalty': 'l2', 'solver': 'newton-cg', 'l1_ratio': masked}
# Best CV recall: 0.4791666666666667
# Best params for roc_auc: {'C': np.float64(0.615848211066026), 'max_iter': np.int64(100), 'penalty': 'l2', 'solver': 'sag', 'l1_ratio': masked}
# Best CV roc_auc: 0.7853050595238096
# ------

# For kNN:
# Best params for accuracy: {'n_neighbors': np.int64(10), 'weights': 'distance'}
# Best CV accuracy: 0.7425
# Best params for f1: {'n_neighbors': np.int64(3), 'weights': 'uniform'}
# Best CV f1: 0.45962787039846775
# Best params for precision: {'n_neighbors': np.int64(25), 'weights': 'distance'}
# Best CV precision: 0.8350000000000002
# Best params for recall: {'n_neighbors': np.int64(2), 'weights': 'distance'}
# Best CV recall: 0.39583333333333337
# Best params for roc_auc: {'n_neighbors': np.int64(24), 'weights': 'distance'}
# Best CV roc_auc: 0.75859375
# ------

# For Random Forest:
# Best params for accuracy: {'criterion': 'entropy', 'max_depth': 20, 'max_features': 'sqrt', 'min_samples_leaf': np.int64(1), 'min_samples_split': np.int64(5), 'n_estimators': np.int64(50)}
# Best CV accuracy: 0.7775000000000001
# Best params for f1: {'criterion': 'entropy', 'max_depth': 20, 'max_features': 'sqrt', 'min_samples_leaf': np.int64(1), 'min_samples_split': np.int64(5), 'n_estimators': np.int64(50)}
# Best CV f1: 0.5223626341081272
# Best params for precision: {'criterion': 'gini', 'max_depth': 10, 'max_features': 'log2', 'min_samples_leaf': np.int64(1), 'min_samples_split': np.int64(5), 'n_estimators': np.int64(100)}
# Best CV precision: 0.8001082251082252
# Best params for recall: {'criterion': 'entropy', 'max_depth': 20, 'max_features': 'sqrt', 'min_samples_leaf': np.int64(1), 'min_samples_split': np.int64(5), 'n_estimators': np.int64(50)}
# Best CV recall: 0.4083333333333334
# Best params for roc_auc: {'criterion': 'gini', 'max_depth': 10, 'max_features': 'log2', 'min_samples_leaf': np.int64(2), 'min_samples_split': np.int64(10), 'n_estimators': np.int64(150)}
# Best CV roc_auc: 0.7994419642857143
# ------

# For XGBoost:
# Best params for accuracy: {'colsample_bytree': np.float64(0.8), 'gamma': np.float64(0.2), 'max_depth': np.int64(7), 'min_child_weight': np.int64(3), 'n_estimators': np.int64(50), 'subsample': np.float64(0.7)}
# Best CV accuracy: 0.7700000000000001
# Best params for f1: {'colsample_bytree': np.float64(0.8), 'gamma': np.float64(0.2), 'max_depth': np.int64(7), 'min_child_weight': np.int64(3), 'n_estimators': np.int64(50), 'subsample': np.float64(0.7)}
# Best CV f1: 0.573550385273438
# Best params for precision: {'colsample_bytree': np.float64(0.7), 'gamma': np.float64(0.1), 'max_depth': np.int64(3), 'min_child_weight': np.int64(3), 'n_estimators': np.int64(50), 'subsample': np.float64(1.0)}
# Best CV precision: 0.6433135949248482
# Best params for recall: {'colsample_bytree': np.float64(0.8), 'gamma': np.float64(0.2), 'max_depth': np.int64(5), 'min_child_weight': np.int64(3), 'n_estimators': np.int64(100), 'subsample': np.float64(0.8)}
# Best CV recall: 0.5333333333333333
# Best params for roc_auc: {'colsample_bytree': np.float64(0.8), 'gamma': np.float64(0.1), 'max_depth': np.int64(3), 'min_child_weight': np.int64(1), 'n_estimators': np.int64(50), 'subsample': np.float64(0.7)}
# Best CV roc_auc: 0.7905133928571428
# ------



In [None]:
## for each best model, output test metrics
print_test_metrics(lr_model, X_test, y_test, "Logistic Regression")
print_test_metrics(knn_model, X_test, y_test, "kNN")
print_test_metrics(rf_model, X_test, y_test, "Random Forest")
print_test_metrics(xgb_model, X_test, y_test, "XGBoost")

In [None]:
# Best parameters for Logistic Regression: {'model__C': np.float64(0.615848211066026), 'model__max_iter': 100, 'model__penalty': 'l2', 'model__solver': 'lbfgs'}
# Test accuracy: 0.785
# Test f1: 0.5904761904761905
# Test precision: 0.6888888888888889
# Test recall: 0.5166666666666667
# Test roc_auc: 0.8051190476190476
# ------
# Best parameters for kNN: {'model__n_neighbors': np.int64(10), 'model__weights': 'distance'}
# Test accuracy: 0.75
# Test f1: 0.46808510638297873
# Test precision: 0.6470588235294118
# Test recall: 0.36666666666666664
# Test roc_auc: 0.7516666666666666
# ------
# Best parameters for Random Forest: {'model__criterion': 'entropy', 'model__max_depth': 20, 'model__max_features': 'sqrt', 'model__min_samples_leaf': 1, 'model__min_samples_split': 5, 'model__n_estimators': 50}
# Test accuracy: 0.76
# Test f1: 0.48936170212765956
# Test precision: 0.6764705882352942
# Test recall: 0.38333333333333336
# Test roc_auc: 0.7735714285714286
# ------
# Best parameters for XGBoost: {'model__colsample_bytree': 0.8, 'model__gamma': 0.2, 'model__max_depth': 7, 'model__min_child_weight': 3, 'model__n_estimators': 50, 'model__subsample': 0.7}
# Test accuracy: 0.755
# Test f1: 0.5333333333333333
# Test precision: 0.6222222222222222
# Test recall: 0.4666666666666667
# Test roc_auc: 0.7428571428571428
# ------