# 02_model_training (Diabetes)


In [None]:
import json, pandas as pd
from pathlib import Path
import os
import tensorflow as tf

project_root = Path(os.getcwd()).resolve()
if project_root.name == "notebooks":
    project_root = project_root.parent

DATA = project_root / "data" / "processed"
MODELS = project_root / "models"

# DATA = Path('data/processed')
MODELS = Path('models')
MODELS.mkdir(parents=True, exist_ok=True)

train = pd.read_csv(DATA/'train.csv')
val   = pd.read_csv(DATA/'val.csv')
test  = pd.read_csv(DATA/'test.csv')

X_train, y_train = train.drop(columns=['target']).values, train['target'].values
X_val,   y_val   = val.drop(columns=['target']).values,   val['target'].values
X_test,  y_test  = test.drop(columns=['target']).values,  test['target'].values
n_features = X_train.shape[1]

X_train.shape, X_val.shape, X_test.shape

((537, 8), (115, 8), (116, 8))

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras import regularizers
from keras.optimizers import Adam, SGD, RMSprop

def build_model_1(n):
    m = Sequential([
        Dense(128, activation='relu', input_shape=(n,)),
        BatchNormalization(),
        Dropout(0.3),
        Dense(64, activation='relu', kernel_regularizer=regularizers.l2(1e-4)),
        Dropout(0.3),
        Dense(1, activation='sigmoid')
    ])
    m.compile(optimizer=Adam(1e-3), loss='binary_crossentropy', metrics=['accuracy'])
    return m

def build_model_2(n):
    m = Sequential([
        Dense(256, activation='relu', input_shape=(n,)),
        Dense(128, activation='relu'),
        BatchNormalization(),
        Dropout(0.5),
        Dense(64, activation='relu', kernel_regularizer=regularizers.l1(1e-5)),
        Dense(1, activation='sigmoid')
    ])
    m.compile(optimizer=RMSprop(1e-3), loss='binary_crossentropy', metrics=['accuracy'])
    return m

def build_model_3(n):
    m = Sequential([
        Dense(64, activation='relu', input_shape=(n,)),
        Dropout(0.2),
        Dense(64, activation='relu'),
        BatchNormalization(),
        Dense(32, activation='relu'),
        Dropout(0.3),
        Dense(1, activation='sigmoid')
    ])
    m.compile(optimizer=SGD(learning_rate=1e-2, momentum=0.9, nesterov=True),
              loss='binary_crossentropy', metrics=['accuracy'])
    return m

builders = [build_model_1, build_model_2, build_model_3]
callbacks_tpl = lambda name: [
    EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True),
    ModelCheckpoint(MODELS/f"{name}.h5", monitor='val_accuracy', save_best_only=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-5)
]

AttributeError: module 'numpy' has no attribute '_no_nep50_warning'

In [None]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_recall_fscore_support
import matplotlib.pyplot as plt

experiments = []
histories = {}

for builder in builders:
    name = builder.__name__
    model = builder(n_features)
    cb = callbacks_tpl(name)
    h = model.fit(X_train, y_train, validation_data=(X_val, y_val),
                  epochs=200, batch_size=32, verbose=0, callbacks=cb)
    histories[name] = h.history

    # Evaluate model
    y_prob = model.predict(X_test).ravel()
    y_pred = (y_prob >= 0.5).astype(int)
    acc = accuracy_score(y_test, y_pred)
    pr, rc, f1, _ = precision_recall_fscore_support(y_test, y_pred, average='binary', zero_division=0)

    experiments.append({
        'name': name,
        'test_accuracy': float(acc),
        'precision': float(pr),
        'recall': float(rc),
        'f1': float(f1),
        'confusion_matrix': confusion_matrix(y_test, y_pred).tolist(),
        'report': classification_report(y_test, y_pred, output_dict=True)
    })

# save model (history)
with open(MODELS/'training_history.json', 'w') as f:
    json.dump({'histories': histories, 'experiments': experiments}, f, indent=2)

experiments

In [None]:
# choose best model to best_model.h5
import shutil
best = max(experiments, key=lambda d: d['test_accuracy'])
shutil.copy(MODELS/f"{best['name']}.h5", MODELS/'best_model.h5')
best

In [None]:
# draw accuracy curves
import matplotlib.pyplot as plt
def plot_history(hist, title):
    fig, ax = plt.subplots(1,2, figsize=(12,4))
    ax[0].plot(hist['loss'], label='train_loss')
    ax[0].plot(hist['val_loss'], label='val_loss')
    ax[0].set_title(title + ' - Loss'); ax[0].legend()
    if 'accuracy' in hist:
        ax[1].plot(hist['accuracy'], label='train_acc')
        ax[1].plot(hist['val_accuracy'], label='val_acc')
    elif 'acc' in hist:
        ax[1].plot(hist['acc'], label='train_acc')
        ax[1].plot(hist['val_acc'], label='val_acc')
    ax[1].set_title(title + ' - Accuracy'); ax[1].legend()
    plt.show()

for name, hist in histories.items():
    plot_history(hist, name)

In [None]:
# Save training curves & confusion matrices
import seaborn as sns
from sklearn.metrics import confusion_matrix, roc_curve, auc

project_root = Path(os.getcwd()).resolve()
if project_root.name == "notebooks":
    project_root = project_root.parent

CURVES_DIR = project_root / "visualizations" / "training_curves"
CM_DIR     = project_root / "visualizations" / "confusion_matrices"
DOCS_DIR   = project_root / "docs"
for d in [CURVES_DIR, CM_DIR, DOCS_DIR]:
    d.mkdir(parents=True, exist_ok=True)

# 2.1) Save history plots per model
for name, hist in histories.items():
    fig, ax = plt.subplots(1,2, figsize=(12,4))
    ax[0].plot(hist['loss'], label='train_loss')
    ax[0].plot(hist['val_loss'], label='val_loss')
    ax[0].set_title(f'{name} - Loss'); ax[0].legend()

    acc_series = hist.get('accuracy', hist.get('acc'))
    val_acc_series = hist.get('val_accuracy', hist.get('val_acc'))
    if acc_series is not None:
        ax[1].plot(acc_series, label='train_acc')
    if val_acc_series is not None:
        ax[1].plot(val_acc_series, label='val_acc')
    ax[1].set_title(f'{name} - Accuracy'); ax[1].legend()

    fig.tight_layout()
    fig.savefig(CURVES_DIR / f"{name}_curves.png", dpi=180)
    plt.close(fig)

print("Saved training curves to:", CURVES_DIR)

# 2.2) Save confusion matrices + ROC for each trained model on test set
# rebuild models quickly (or reuse; here we recompute predictions from saved hists/experiments)
# We already computed y_pred in the loop above; let's recompute from disk to be safe:

from tensorflow.keras.models import load_model
import pandas as pd
DATA = project_root / "data" / "processed"
test  = pd.read_csv(DATA / "test.csv")
X_test, y_test = test.drop(columns=['target']).values, test['target'].values

for exp in experiments:
    name = exp['name']
    model_path = project_root / "models" / f"{name}.h5"
    if not model_path.exists():
        continue
    mdl = load_model(model_path)
    y_prob = mdl.predict(X_test).ravel()
    y_pred = (y_prob >= 0.5).astype(int)

    # Confusion Matrix
    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(4,3))
    sns.heatmap(cm, annot=True, fmt='d', cbar=False)
    plt.title(f'{name} - Confusion Matrix'); plt.xlabel('Predicted'); plt.ylabel('Actual')
    plt.tight_layout()
    plt.savefig(CM_DIR / f"{name}_confusion_matrix.png", dpi=180)
    plt.close()

    # ROC
    fpr, tpr, _ = roc_curve(y_test, y_prob)
    roc_auc = auc(fpr, tpr)
    plt.figure(figsize=(4,3))
    plt.plot(fpr, tpr, label=f'AUC={roc_auc:.3f}')
    plt.plot([0,1], [0,1], linestyle='--')
    plt.title(f'{name} - ROC'); plt.xlabel('FPR'); plt.ylabel('TPR'); plt.legend()
    plt.tight_layout()
    plt.savefig(CURVES_DIR / f"{name}_roc.png", dpi=180)
    plt.close()

print("Saved confusion matrices to:", CM_DIR)

# 2.3) Save a readable classification report (top model)
best = max(experiments, key=lambda d: d['test_accuracy'])
with open(DOCS_DIR / "classification_report_best.txt", "w", encoding="utf-8") as f:
    f.write(f"Best model: {best['name']}\n")
    f.write(f"Accuracy: {best['test_accuracy']:.4f}\n")
    f.write(f"Precision: {best['precision']:.4f}\nRecall: {best['recall']:.4f}\nF1: {best['f1']:.4f}\n\n")
    f.write("Full classification report (dict):\n")
    f.write(str(best['report']))
print("Saved best classification report to docs/")
