In [1]:
import sqlite3
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import (
    precision_recall_curve, 
    auc, 
    brier_score_loss, 
    classification_report
)

import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.regularizers import l2

import matplotlib.pyplot as plt
import os

print("TensorFlow:", tf.__version__)


TensorFlow: 2.16.1


In [3]:
import os
os.getcwd()


'C:\\Users\\sthum\\cs230-project\\notebooks'

                               data loading

In [5]:
# Load labeled data
conn = sqlite3.connect(r"C:\Users\sthum\cs230-project\data\routes_scores.db")

df = pd.read_sql("SELECT * FROM routes;", conn)
conn.close()

print("Dataset shape:", df.shape)
df.head()


Dataset shape: (20000, 4)


Unnamed: 0,route_id,counties,impact_score,impacting_delivery
0,1,"NORTH CENTRAL & SOUTHEAST SISKIYOU COUNTY,SAN ...",0.231843,0
1,2,"SANTA YNEZ MOUNTAINS EASTERN RANGE,SAN JOAQUIN...",0.228992,0
2,3,SANTA LUCIA MOUNTAINS AND LOS PADRES NATIONAL ...,0.227583,0
3,4,"HUMBOLDT,W CENTRAL S.J. VALLEY,SAN GORGONIO PA...",0.226957,0
4,5,"TRINITY,Carquinez Strait and Delta,ORANGE COUN...",0.262501,1


                Common split function

In [6]:
def make_splits(df, feature_cols, target_col="impacting_delivery"):
    """
    Returns train/val/test splits given selected feature columns.
    Stratified by label.
    """
    df_clean = df.copy()
    
    # Basic missing handling
    for col in feature_cols:
        if df_clean[col].dtype == "O":  # object/categorical
            df_clean[col] = df_clean[col].fillna("Unknown")
        else:
            df_clean[col] = df_clean[col].fillna(df_clean[col].median())
    
    train, test = train_test_split(
        df_clean,
        test_size=0.15,
        random_state=42,
        stratify=df_clean[target_col]
    )
    train, val = train_test_split(
        train,
        test_size=0.1765,  # so final 70/15/15
        random_state=42,
        stratify=train[target_col]
    )
    
    X_train = train[feature_cols]
    y_train = train[target_col]
    X_val   = val[feature_cols]
    y_val   = val[target_col]
    X_test  = test[feature_cols]
    y_test  = test[target_col]
    
    print("Train:", X_train.shape, "Val:", X_val.shape, "Test:", X_test.shape)
    print("Label distribution (train):")
    print(y_train.value_counts(normalize=True))
    
    return X_train, y_train, X_val, y_val, X_test, y_test


                                        Preprocessing + tf.data helper

In [None]:
def build_preprocessor(numeric_features, categorical_features):
    numeric_transformer = Pipeline(steps=[
        ("scaler", StandardScaler())
    ])

    categorical_transformer = Pipeline(steps=[
        ("encoder", OneHotEncoder(handle_unknown="ignore"))
    ])

    preprocessor = ColumnTransformer(
        transformers=[
            ("num", numeric_transformer, numeric_features),
            ("cat", categorical_transformer, categorical_features),
        ]
    )
    return preprocessor


In [8]:
def preprocess_and_to_arrays(preprocessor, X_train, X_val, X_test):
    X_train_proc = preprocessor.fit_transform(X_train)
    X_val_proc   = preprocessor.transform(X_val)
    X_test_proc  = preprocessor.transform(X_test)

    # Many sklearn transformers return sparse matrices → convert to dense
    if hasattr(X_train_proc, "toarray"):
        X_train_proc = X_train_proc.toarray()
        X_val_proc   = X_val_proc.toarray()
        X_test_proc  = X_test_proc.toarray()

    print("Processed feature dim:", X_train_proc.shape[1])
    return X_train_proc, X_val_proc, X_test_proc


In [9]:
def make_tf_datasets(X_train, y_train, X_val, y_val, X_test, y_test, batch_size=32):
    train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train.values))
    val_ds   = tf.data.Dataset.from_tensor_slices((X_val, y_val.values))
    test_ds  = tf.data.Dataset.from_tensor_slices((X_test, y_test.values))
    
    train_ds = train_ds.shuffle(buffer_size=len(X_train)).batch(batch_size).prefetch(tf.data.AUTOTUNE)
    val_ds   = val_ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    test_ds  = test_ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    
    return train_ds, val_ds, test_ds


                        MLP model builder

In [11]:
def build_mlp(input_dim, l2_reg=1e-4, dropout_rate=0.3):
    model = Sequential([
        Dense(64, activation="relu", kernel_regularizer=l2(l2_reg), input_shape=(input_dim,)),
        Dense(32, activation="relu", kernel_regularizer=l2(l2_reg)),
        Dropout(dropout_rate),
        Dense(1, activation="sigmoid")
    ])
    
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
        loss="binary_crossentropy",
        metrics=[
            tf.keras.metrics.Precision(name="precision"),
            tf.keras.metrics.Recall(name="recall"),
            tf.keras.metrics.AUC(name="AUPRC", curve="PR")
        ]
    )
    
    model.summary()
    return model


                     Training + evaluation helper

In [15]:
def train_and_evaluate_mlp(
    X_train_proc, y_train,
    X_val_proc, y_val,
    X_test_proc, y_test,
    tag="with_score"
):
    # Build datasets
    train_ds, val_ds, test_ds = make_tf_datasets(
        X_train_proc, y_train, X_val_proc, y_val, X_test_proc, y_test, batch_size=32
    )
    
    # Build model
    model = build_mlp(input_dim=X_train_proc.shape[1])
    
    # Callbacks
    early_stop = tf.keras.callbacks.EarlyStopping(
        monitor="val_loss",
        patience=5,
        restore_best_weights=True
    )
    
    history = model.fit(
        train_ds,
        validation_data=val_ds,
        epochs=50,
        callbacks=[early_stop],
        verbose=1
    )
    
    # Plot training curves
    os.makedirs("results", exist_ok=True)
    plt.plot(history.history["loss"], label="train_loss")
    plt.plot(history.history["val_loss"], label="val_loss")
    plt.legend()
    plt.title(f"Loss Curves ({tag})")
    plt.savefig(f"results/mlp_loss_{tag}.png", dpi=150)
    plt.close()
    
    # Evaluate
    results = model.evaluate(test_ds, return_dict=True)
    print(f"\nTest metrics ({tag}):", results)
    
    # Predict probabilities on test set
    probs = model.predict(X_test_proc).ravel()
    
    precision, recall, thresholds = precision_recall_curve(y_test, probs)
    auc_pr = auc(recall, precision)
    brier  = brier_score_loss(y_test, probs)
    
    print(f"AUPRC ({tag}):", auc_pr)
    print(f"Brier Score ({tag}):", brier)
    print("\nClassification Report:\n")
    print(classification_report(y_test, (probs >= 0.5).astype(int)))
    
    # Save PR curve
    plt.plot(recall, precision)
    plt.title(f"Precision-Recall Curve ({tag}) AUC={auc_pr:.3f}")
    plt.xlabel("Recall")
    plt.ylabel("Precision")
    plt.savefig(f"results/pr_curve_mlp_{tag}.png", dpi=150)
    plt.close()
    
    # Save metrics
    metrics = results.copy()
    metrics["AUPRC_curve"] = auc_pr
    metrics["brier_score"] = brier
    pd.DataFrame([metrics]).to_csv(f"results/mlp_metrics_{tag}.csv", index=False)
    
    # Save model
    os.makedirs("models", exist_ok=True)
    model.save(f"models/mlp_{tag}.keras")
    
    return model, metrics


            Experiment 1 — With impact_score (Model A)

In [16]:
# Features for Model A (with impact_score)
numeric_features_A = ["impact_score"]
categorical_features_A = ["counties"]
feature_cols_A = numeric_features_A + categorical_features_A

X_train_A, y_train_A, X_val_A, y_val_A, X_test_A, y_test_A = make_splits(
    df, feature_cols_A, target_col="impacting_delivery"
)

preprocessor_A = build_preprocessor(numeric_features_A, categorical_features_A)
X_train_proc_A, X_val_proc_A, X_test_proc_A = preprocess_and_to_arrays(
    preprocessor_A, X_train_A, X_val_A, X_test_A
)


Train: (13999, 2) Val: (3001, 2) Test: (3000, 2)
Label distribution (train):
impacting_delivery
0    0.765055
1    0.234945
Name: proportion, dtype: float64
Processed feature dim: 14000


In [17]:
# Train & evaluate MLP (with impact_score)
mlp_with_score, metrics_with_score = train_and_evaluate_mlp(
    X_train_proc_A, y_train_A,
    X_val_proc_A, y_val_A,
    X_test_proc_A, y_test_A,
    tag="with_score"
)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 29ms/step - AUPRC: 0.9913 - loss: 0.1513 - precision: 0.9706 - recall: 0.9425 - val_AUPRC: 1.0000 - val_loss: 0.0371 - val_precision: 0.9958 - val_recall: 1.0000
Epoch 2/50
[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 22ms/step - AUPRC: 1.0000 - loss: 0.0193 - precision: 0.9991 - recall: 0.9991 - val_AUPRC: 1.0000 - val_loss: 0.0295 - val_precision: 1.0000 - val_recall: 1.0000
Epoch 3/50
[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 22ms/step - AUPRC: 1.0000 - loss: 0.0126 - precision: 1.0000 - recall: 1.0000 - val_AUPRC: 1.0000 - val_loss: 0.0268 - val_precision: 1.0000 - val_recall: 0.9943
Epoch 4/50
[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 22ms/step - AUPRC: 1.0000 - loss: 0.0109 - precision: 1.0000 - recall: 1.0000 - val_AUPRC: 1.0000 - val_loss: 0.0244 - val_precision: 1.0000 - val_recall: 1.0000
Epoch 5/50
[1m438/438[0m [32m━━━━━━━━

        Experiment 2 — Without impact_score   (Model B)

In [18]:
# Features for Model B (without impact_score)
numeric_features_B = []   # none
categorical_features_B = ["counties"]
feature_cols_B = categorical_features_B

X_train_B, y_train_B, X_val_B, y_val_B, X_test_B, y_test_B = make_splits(
    df, feature_cols_B, target_col="impacting_delivery"
)

preprocessor_B = build_preprocessor(numeric_features_B, categorical_features_B)
X_train_proc_B, X_val_proc_B, X_test_proc_B = preprocess_and_to_arrays(
    preprocessor_B, X_train_B, X_val_B, X_test_B
)


Train: (13999, 1) Val: (3001, 1) Test: (3000, 1)
Label distribution (train):
impacting_delivery
0    0.765055
1    0.234945
Name: proportion, dtype: float64
Processed feature dim: 13999


In [19]:
# Train & evaluate MLP (without impact_score)
mlp_no_score, metrics_no_score = train_and_evaluate_mlp(
    X_train_proc_B, y_train_B,
    X_val_proc_B, y_val_B,
    X_test_proc_B, y_test_B,
    tag="no_score"
)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 43ms/step - AUPRC: 0.2351 - loss: 0.5657 - precision: 0.2500 - recall: 0.0012 - val_AUPRC: 0.2349 - val_loss: 0.5548 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 2/50
[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 46ms/step - AUPRC: 0.9523 - loss: 0.4055 - precision: 1.0000 - recall: 0.0453 - val_AUPRC: 0.2349 - val_loss: 0.6038 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 3/50
[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 40ms/step - AUPRC: 1.0000 - loss: 0.1899 - precision: 1.0000 - recall: 0.9729 - val_AUPRC: 0.2349 - val_loss: 0.5970 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 4/50
[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 44ms/step - AUPRC: 1.0000 - loss: 0.1480 - precision: 1.0000 - recall: 0.9884 - val_AUPRC: 0.2349 - val_loss: 0.5916 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 5/

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


                   comparison table

In [20]:
comparison = pd.DataFrame([
    {"model": "MLP_with_score", "AUPRC": metrics_with_score["AUPRC_curve"], "brier": metrics_with_score["brier_score"]},
    {"model": "MLP_no_score",   "AUPRC": metrics_no_score["AUPRC_curve"],   "brier": metrics_no_score["brier_score"]},
])

comparison


Unnamed: 0,model,AUPRC,brier
0,MLP_with_score,1.0,0.001138
1,MLP_no_score,0.6175,0.179802
