In [1]:
import os
import pickle
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader

TRAIN_CSV = "data_rpm_with_predictions_residuals.csv"  # From your MC Dropout model
FEATURES = [
    "mass",
    "rolling_resistance_coefficient",
    "drag_coefficient",
    "frontal_area",
    "slope_percent",
    "wheel_diameter",
    "reduction_ratio",
    "drivetrain_efficiency",
    "velocity",
    "distance",
    "battery_efficiency",
]
TARGET = "net_energy_kwh"

EPOCHS = 150
BATCH_SIZE = 32
LR = 1e-3
TEST_SIZE = 0.2
RANDOM_STATE = 42

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
torch.manual_seed(RANDOM_STATE)
np.random.seed(RANDOM_STATE)


class StandardMLP(nn.Module):
    def __init__(self, in_dim: int):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_dim, 64),
            nn.LeakyReLU(),
            nn.Linear(64, 128),
            nn.LeakyReLU(),
            nn.Linear(128, 64),
            nn.LeakyReLU(),
            nn.Linear(64, 1),
        )

    def forward(self, x):
        return self.net(x)


def main():
    print("=" * 60)
    print("TRAINING STANDARD NEURAL NETWORK (SAVE CSV ONLY)")
    print("=" * 60)

    df = pd.read_csv(TRAIN_CSV)

    if f"{TARGET}_predicted" not in df.columns or f"{TARGET}_residual" not in df.columns:
        print("ERROR: CSV doesn't have MC Dropout predictions!")
        print(f"Expected columns: '{TARGET}_predicted' and '{TARGET}_residual'")
        print("Make sure to run your MC Dropout model first.")
        return

    mc_pred = df[f"{TARGET}_predicted"].values
    mc_residual = df[f"{TARGET}_residual"].values
    y_true = df[TARGET].values

    print(f"Loaded {len(df)} rows with MC Dropout predictions")

    X = df[FEATURES].to_numpy(dtype=np.float32)
    y = df[TARGET].to_numpy(dtype=np.float32)

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=TEST_SIZE, random_state=RANDOM_STATE
    )

    x_scaler = StandardScaler()
    X_train_s = x_scaler.fit_transform(X_train)
    X_test_s = x_scaler.transform(X_test)

    model = StandardMLP(in_dim=X_train_s.shape[1]).to(DEVICE)

    train_loader = DataLoader(
        TensorDataset(
            torch.tensor(X_train_s, dtype=torch.float32),
            torch.tensor(y_train.reshape(-1, 1), dtype=torch.float32),
        ),
        batch_size=BATCH_SIZE,
        shuffle=True,
    )

    val_loader = DataLoader(
        TensorDataset(
            torch.tensor(X_test_s, dtype=torch.float32),
            torch.tensor(y_test.reshape(-1, 1), dtype=torch.float32),
        ),
        batch_size=BATCH_SIZE,
        shuffle=False,
    )

    optimizer = torch.optim.Adam(model.parameters(), lr=LR)
    loss_fn = nn.MSELoss()

    best_val_loss = float("inf")
    patience = 20
    patience_counter = 0

    for epoch in range(EPOCHS):
        model.train()
        train_loss = 0.0
        for xb, yb in train_loader:
            xb, yb = xb.to(DEVICE), yb.to(DEVICE)
            optimizer.zero_grad()
            pred = model(xb)
            loss = loss_fn(pred, yb)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for xb, yb in val_loader:
                xb, yb = xb.to(DEVICE), yb.to(DEVICE)
                pred = model(xb)
                loss = loss_fn(pred, yb)
                val_loss += loss.item()

        avg_val_loss = val_loss / len(val_loader)

        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            patience_counter = 0
        else:
            patience_counter += 1

        if patience_counter >= patience:
            print(f"Early stopping at epoch {epoch+1}")
            break

        if (epoch + 1) % 20 == 0:
            print(
                f"Epoch {epoch+1}/{EPOCHS}: "
                f"Train Loss: {train_loss/len(train_loader):.4f}, "
                f"Val Loss: {avg_val_loss:.4f}"
            )

    X_all_scaled = x_scaler.transform(X)
    model.eval()
    with torch.no_grad():
        X_tensor = torch.tensor(X_all_scaled, dtype=torch.float32).to(DEVICE)
        nn_pred = model(X_tensor).cpu().numpy().flatten()

    nn_residual = y_true - nn_pred

    results_df = df.copy()
    results_df[f"{TARGET}_standard_predicted"] = nn_pred
    results_df[f"{TARGET}_standard_residual"] = nn_residual

    os.makedirs("comparison_results", exist_ok=True)
    csv_path = os.path.join("comparison_results", "residual_plot_data.csv")
    results_df.to_csv(csv_path, index=False)
    print(f"\n✓ Saved plot-ready CSV to: {csv_path}")  # index=False keeps index out. [web:36]


if __name__ == "__main__":
    main()


TRAINING STANDARD NEURAL NETWORK (SAVE CSV ONLY)
Loaded 50000 rows with MC Dropout predictions
Epoch 20/150: Train Loss: 1.7703, Val Loss: 2.3822
Epoch 40/150: Train Loss: 1.0480, Val Loss: 0.3990
Epoch 60/150: Train Loss: 0.9742, Val Loss: 0.3003
Epoch 80/150: Train Loss: 0.8219, Val Loss: 0.4181
Epoch 100/150: Train Loss: 0.7441, Val Loss: 0.3368
Epoch 120/150: Train Loss: 0.5531, Val Loss: 0.2754
Early stopping at epoch 121

✓ Saved plot-ready CSV to: comparison_results\residual_plot_data.csv
