# 04 â€” Modeling & Evaluation

Trains baseline Logistic Regression and an XGBoost model, then evaluates them.

> Tip: In the full pipeline these steps are executed by `main.py`.

In [1]:
# Notebook setup
import os, sys
from pathlib import Path

# Add project root to PYTHONPATH
PROJECT_ROOT = Path.cwd().parent
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

print('Project root:', PROJECT_ROOT)


Project root: /mnt/data/churn_fix/churn-reduction-end-to-end


In [2]:
import json
import pandas as pd

from src.config import Config
from src.train import train_models
from src.evaluate import evaluate_model
from src.thresholding import find_optimal_threshold

cfg = Config()
processed_path = PROJECT_ROOT / cfg.PROCESSED_DIR / cfg.PROCESSED_FILENAME
df = pd.read_csv(processed_path)
print('Loaded:', processed_path, 'shape=', df.shape)


Loaded: /mnt/data/churn_fix/churn-reduction-end-to-end/data/processed/processed_dataset.csv shape= (20000, 34)


In [3]:
artifacts = train_models(df, cfg)
print('Models trained')

Models trained


In [4]:
# Evaluate baseline
baseline_metrics = evaluate_model(
    model=artifacts.baseline_pipeline,
    X_test=artifacts.X_test,
    y_test=artifacts.y_test,
    threshold=0.5
)
{k: baseline_metrics[k] for k in ['roc_auc','pr_auc','threshold']}

# Confusion matrix
baseline_metrics['confusion_matrix']

{'tn': 109, 'fp': 1020, 'fn': 78, 'tp': 2793}

In [5]:
# Evaluate XGB
xgb_metrics = evaluate_model(
    model=artifacts.xgb_pipeline,
    X_test=artifacts.X_test,
    y_test=artifacts.y_test,
    threshold=0.5
)
{k: xgb_metrics[k] for k in ['roc_auc','pr_auc','threshold']}

# Confusion matrix
xgb_metrics['confusion_matrix']

{'tn': 106, 'fp': 1023, 'fn': 83, 'tp': 2788}

In [6]:
# Business thresholding
proba_xgb = artifacts.xgb_pipeline.predict_proba(artifacts.X_test)[:, 1]
opt = find_optimal_threshold(
    y_true=artifacts.y_test,
    y_proba=proba_xgb,
    cost_fn=cfg.COST_FALSE_NEGATIVE,
    cost_fp=cfg.COST_FALSE_POSITIVE,
)
opt


{'threshold': 0.27, 'expected_cost': 5640.0, 'fp': 1128, 'fn': 0}