# ðŸ¤– Deep Learning: ANN & CNN

This notebook trains:

- A simple ANN baseline
- A 1D CNN model
- Score binning and calibration-style plots
- Score combination (ensemble-like composite score)

> Important: because `bad` is rare (~2.35%), accuracy can be misleading. Prefer AUC, PR-AUC, recall/precision, and calibrated risk curves.

In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

plt.rcParams["figure.figsize"] = (12, 6)

## Load engineered dataset

This notebook expects:
- `df_fe` (features + `bad`)
- or load from disk (recommended).

In [None]:
# If you saved df_fe earlier:
# df_fe = pd.read_parquet(DATA_DIR + "df_fe.parquet")

df_fe.head()

## Train/test split and scaling

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

X = df_fe.drop(columns=["bad"])
y = df_fe["bad"].astype(int).to_numpy()

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.30, random_state=0, stratify=y
)

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test  = sc.transform(X_test)

# reshape for 1D CNN: (samples, timesteps/features, channels)
X_train_cnn = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test_cnn  = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

X_train.shape, X_train_cnn.shape

## 1) ANN baseline

In [None]:
import tensorflow as tf

ann = tf.keras.models.Sequential([
    tf.keras.layers.Dense(units=X_train.shape[1], activation="relu", input_dim=X_train.shape[1]),
    tf.keras.layers.Dense(units=6, activation="relu"),
    tf.keras.layers.Dense(units=1, activation="sigmoid"),
])

ann.compile(optimizer="adam", loss="binary_crossentropy", metrics=[
    "accuracy",
    tf.keras.metrics.AUC(name="auc"),
    tf.keras.metrics.AUC(name="pr_auc", curve="PR"),
])

ann.summary()

In [None]:
history_ann = ann.fit(X_train, y_train, batch_size=256, epochs=10, validation_data=(X_test, y_test))

### Evaluate ANN

In [None]:
ann_eval = ann.evaluate(X_test, y_test, verbose=0)
dict(zip(ann.metrics_names, ann_eval))

## 2) 1D CNN

In [None]:
cnn = tf.keras.models.Sequential([
    tf.keras.layers.Conv1D(filters=32, kernel_size=2, padding="same", activation="relu", input_shape=(X_train.shape[1], 1)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPool1D(pool_size=2),
    tf.keras.layers.Dropout(0.2),

    tf.keras.layers.Conv1D(filters=64, kernel_size=2, padding="same", activation="relu"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPool1D(pool_size=2),
    tf.keras.layers.Dropout(0.3),

    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(units=64, activation="relu"),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(units=1, activation="sigmoid"),
])

opt = tf.keras.optimizers.Adam(learning_rate=1e-4)
cnn.compile(optimizer=opt, loss="binary_crossentropy", metrics=[
    "accuracy",
    tf.keras.metrics.AUC(name="auc"),
    tf.keras.metrics.AUC(name="pr_auc", curve="PR"),
])

cnn.summary()

In [None]:
history_cnn = cnn.fit(X_train_cnn, y_train, epochs=25, validation_data=(X_test_cnn, y_test))

### Evaluate CNN

In [None]:
cnn_eval = cnn.evaluate(X_test_cnn, y_test, verbose=0)
dict(zip(cnn.metrics_names, cnn_eval))

## Scoring + binning plots

In [None]:
def score_bins(y_true, y_score, n_bins=10):
    out = pd.DataFrame({"y": y_true, "y_pred": y_score})
    out["score_cut"] = pd.qcut(out["y_pred"], q=n_bins, labels=list(range(n_bins)))
    g = out.groupby("score_cut")[["y_pred", "y"]].mean().reset_index()
    g["score_cut"] = g["score_cut"].astype(int)
    g["score_cut"] = n_bins - g["score_cut"]
    return out, g

y_pred_ann = ann.predict(X_test, verbose=0).ravel()
y_pred_cnn = cnn.predict(X_test_cnn, verbose=0).ravel()

ann_df, ann_sum = score_bins(y_test, y_pred_ann, n_bins=10)
cnn_df, cnn_sum = score_bins(y_test, y_pred_cnn, n_bins=10)

fig = plt.figure(figsize=(12,5))
sns.barplot(x=ann_sum["score_cut"], y=ann_sum["y"])
plt.title("ANN: Actual Bad Rate by Score Decile (higher= riskier)")
plt.xlabel("Score Decile (1=highest)")
plt.ylabel("Actual bad rate")
plt.show()

fig = plt.figure(figsize=(12,5))
sns.barplot(x=cnn_sum["score_cut"], y=cnn_sum["y"])
plt.title("CNN: Actual Bad Rate by Score Decile (higher= riskier)")
plt.xlabel("Score Decile (1=highest)")
plt.ylabel("Actual bad rate")
plt.show()

## Combine scores (two simple composites)

In [None]:
combo = pd.DataFrame({
    "y": y_test,
    "y_pred_ann": y_pred_ann,
    "y_pred_cnn": y_pred_cnn,
})

# Approach 1: geometric mean (penalizes disagreement)
combo["composite_geo"] = np.sqrt(combo["y_pred_ann"] * combo["y_pred_cnn"])

# Approach 2: simple mean
combo["composite_mean"] = (combo["y_pred_ann"] + combo["y_pred_cnn"]) / 2

combo.corr(numeric_only=True)

In [None]:
for col in ["composite_geo", "composite_mean"]:
    _, s = score_bins(combo["y"].values, combo[col].values, n_bins=10)
    plt.figure(figsize=(12,5))
    sns.barplot(x=s["score_cut"], y=s["y"])
    plt.title(f"Composite ({col}): Actual Bad Rate by Score Decile")
    plt.xlabel("Score Decile (1=highest)")
    plt.ylabel("Actual bad rate")
    plt.show()

## Notes on evaluation

Because the event rate is low (~2â€“3%), you should report:

- ROC AUC
- PR AUC
- Confusion matrix at a chosen threshold (e.g., top 5% risk)
- Lift / KS
- Calibration (predicted vs actual)

â€¦and choose thresholds based on business cost (false negatives vs false positives).