In [1]:
# CELL 1: imports & basic setup

import os
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

print("Torch version:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())


Torch version: 2.6.0+cu124
CUDA available: True


In [2]:
# CELL 2: detect Spotify folder and load CSV

base_input = "/kaggle/input"
print("Folders in /kaggle/input:")
print(os.listdir(base_input))

# Try to auto-detect a folder with "spotify" in the name
spotify_dirs = [d for d in os.listdir(base_input) if "spotify" in d.lower()]
print("\nDetected Spotify-like folders:", spotify_dirs)

if not spotify_dirs:
    raise RuntimeError(
        "No folder with 'spotify' in its name was found in /kaggle/input. "
        "Make sure you've added the Spotify Tracks dataset to this notebook."
    )

DATA_PATH = os.path.join(base_input, spotify_dirs[0])
print("\nUsing DATA_PATH:", DATA_PATH)

# Find a CSV in that folder
csv_files = [f for f in os.listdir(DATA_PATH) if f.lower().endswith(".csv")]
print("CSV files in DATA_PATH:", csv_files)

if not csv_files:
    raise RuntimeError("No CSV files found inside DATA_PATH.")

CSV_FILE = csv_files[0]
print("Using CSV_FILE:", CSV_FILE)

df = pd.read_csv(os.path.join(DATA_PATH, CSV_FILE))
print("\nRaw shape:", df.shape)
df.head()


Folders in /kaggle/input:
['-spotify-tracks-dataset']

Detected Spotify-like folders: ['-spotify-tracks-dataset']

Using DATA_PATH: /kaggle/input/-spotify-tracks-dataset
CSV files in DATA_PATH: ['dataset.csv']
Using CSV_FILE: dataset.csv

Raw shape: (114000, 21)


Unnamed: 0.1,Unnamed: 0,track_id,artists,album_name,track_name,popularity,duration_ms,explicit,danceability,energy,...,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,track_genre
0,0,5SuOikwiRyPMVoIQDJUgSV,Gen Hoshino,Comedy,Comedy,73,230666,False,0.676,0.461,...,-6.746,0,0.143,0.0322,1e-06,0.358,0.715,87.917,4,acoustic
1,1,4qPNDBW1i3p13qLCt0Ki3A,Ben Woodward,Ghost (Acoustic),Ghost - Acoustic,55,149610,False,0.42,0.166,...,-17.235,1,0.0763,0.924,6e-06,0.101,0.267,77.489,4,acoustic
2,2,1iJBSr7s7jYXzM8EGcbK5b,Ingrid Michaelson;ZAYN,To Begin Again,To Begin Again,57,210826,False,0.438,0.359,...,-9.734,1,0.0557,0.21,0.0,0.117,0.12,76.332,4,acoustic
3,3,6lfxq3CG4xtTiEg7opyCyx,Kina Grannis,Crazy Rich Asians (Original Motion Picture Sou...,Can't Help Falling In Love,71,201933,False,0.266,0.0596,...,-18.515,1,0.0363,0.905,7.1e-05,0.132,0.143,181.74,3,acoustic
4,4,5vjLSffimiIP26QG5WcN2K,Chord Overstreet,Hold On,Hold On,82,198853,False,0.618,0.443,...,-9.681,1,0.0526,0.469,0.0,0.0829,0.167,119.949,4,acoustic


In [3]:
# CELL 3: quick EDA – columns, dtypes, popularity stats

print("Columns:\n", df.columns.tolist())
print("\nDtypes:\n", df.dtypes)

if "popularity" not in df.columns:
    raise RuntimeError("No 'popularity' column found. Check the dataset columns.")

print("\nPopularity stats:")
print(df["popularity"].describe())

print("\nMissing values (top 15):")
print(df.isna().sum().sort_values(ascending=False).head(15))


Columns:
 ['Unnamed: 0', 'track_id', 'artists', 'album_name', 'track_name', 'popularity', 'duration_ms', 'explicit', 'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'time_signature', 'track_genre']

Dtypes:
 Unnamed: 0            int64
track_id             object
artists              object
album_name           object
track_name           object
popularity            int64
duration_ms           int64
explicit               bool
danceability        float64
energy              float64
key                   int64
loudness            float64
mode                  int64
speechiness         float64
acousticness        float64
instrumentalness    float64
liveness            float64
valence             float64
tempo               float64
time_signature        int64
track_genre          object
dtype: object

Popularity stats:
count    114000.000000
mean         33.238535
std          22.305078
min           

In [4]:
# CELL 4: basic cleaning

print("Original shape:", df.shape)

# Drop exact duplicate rows
df = df.drop_duplicates()
print("After dropping duplicates:", df.shape)

# (Optional) Drop rows with missing popularity, if any
df = df.dropna(subset=["popularity"])
print("After dropping rows with missing popularity:", df.shape)

# If there are still missing values in other columns, we'll handle below
print("\nRemaining missing values (top 15):")
print(df.isna().sum().sort_values(ascending=False).head(15))


Original shape: (114000, 21)
After dropping duplicates: (114000, 21)
After dropping rows with missing popularity: (114000, 21)

Remaining missing values (top 15):
artists         1
track_name      1
album_name      1
Unnamed: 0      0
track_id        0
popularity      0
duration_ms     0
explicit        0
danceability    0
energy          0
key             0
loudness        0
mode            0
speechiness     0
acousticness    0
dtype: int64


In [5]:
# CELL 5: feature/target setup + preprocessing

target_col = "popularity"

# Columns that are pure IDs / names (no direct numeric meaning for regression)
drop_id_cols = ["track_id", "artists", "album_name", "track_name"]

# Drop only those ID columns that actually exist
drop_id_cols = [c for c in drop_id_cols if c in df.columns]

print("Dropping ID/text columns:", drop_id_cols)

X = df.drop(columns=[target_col] + drop_id_cols)
y = df[target_col].astype(np.float32).values

print("X shape before encoding:", X.shape)
print("Example columns:", X.columns.tolist()[:15])

# One-hot encode non-numeric columns automatically
X_encoded = pd.get_dummies(X, drop_first=True)

print("X_encoded shape after get_dummies:", X_encoded.shape)

# Fill any remaining missing numeric values with column median
X_encoded = X_encoded.fillna(X_encoded.median(numeric_only=True))

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_encoded).astype(np.float32)

y = y.astype(np.float32)

print("Final X_scaled shape:", X_scaled.shape)
print("y shape:", y.shape)


Dropping ID/text columns: ['track_id', 'artists', 'album_name', 'track_name']
X shape before encoding: (114000, 16)
Example columns: ['Unnamed: 0', 'duration_ms', 'explicit', 'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'time_signature']
X_encoded shape after get_dummies: (114000, 128)
Final X_scaled shape: (114000, 128)
y shape: (114000,)


In [6]:
# CELL 6: train/val/test split

# First: train+val vs test
X_train_val, X_test, y_train_val, y_test = train_test_split(
    X_scaled, y, test_size=0.15, random_state=42
)

# Then: split train_val into train and val
val_size = 0.1765  # ~15% of total (0.1765 * 0.85 ≈ 0.15)
X_train, X_val, y_train, y_val = train_test_split(
    X_train_val, y_train_val, test_size=val_size, random_state=42
)

print("Train:", X_train.shape, y_train.shape)
print("Val  :", X_val.shape, y_val.shape)
print("Test :", X_test.shape, y_test.shape)


Train: (79797, 128) (79797,)
Val  : (17103, 128) (17103,)
Test : (17100, 128) (17100,)


In [7]:
# CELL X1: install & import xgboost

!pip install xgboost --quiet

import xgboost as xgb
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np


In [8]:
# CELL X2: prepare DMatrix for train/val/test

dtrain = xgb.DMatrix(X_train, label=y_train)
dval   = xgb.DMatrix(X_val,   label=y_val)
dtest  = xgb.DMatrix(X_test,  label=y_test)


In [9]:
# HY1: small manual hyperparameter search for XGBoost

import xgboost as xgb
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score

def train_and_eval(params_base, extra_params, dtrain, dval, dtest, num_boost_round=2000):
    params = params_base.copy()
    params.update(extra_params)

    evals = [(dtrain, "train"), (dval, "val")]

    model = xgb.train(
        params,
        dtrain,
        num_boost_round=num_boost_round,
        evals=evals,
        early_stopping_rounds=50,
        verbose_eval=False,  # set to 100 if you want logs
    )

    # Best val RMSE is stored in model.best_score
    val_rmse = float(model.best_score)

    # Test metrics
    y_pred_test = model.predict(dtest, iteration_range=(0, model.best_iteration + 1))
    rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test))
    r2_test   = r2_score(y_test, y_pred_test)

    return model, val_rmse, rmse_test, r2_test

# Base params (same style as before)
base_params = {
    "objective": "reg:squarederror",
    "eval_metric": "rmse",
    "eta": 0.05,
    "lambda": 1.0,
    "alpha": 0.0,
}

# A small list of candidate hyperparameter combos
param_candidates = [
    {"max_depth": 6, "min_child_weight": 1, "subsample": 0.8, "colsample_bytree": 0.8, "gamma": 0.0},
    {"max_depth": 8, "min_child_weight": 1, "subsample": 0.8, "colsample_bytree": 0.8, "gamma": 0.0},
    {"max_depth": 10,"min_child_weight": 1, "subsample": 0.8, "colsample_bytree": 0.8, "gamma": 0.0},
    {"max_depth": 8, "min_child_weight": 5, "subsample": 0.8, "colsample_bytree": 0.8, "gamma": 0.0},
    {"max_depth": 8, "min_child_weight": 1, "subsample": 0.7, "colsample_bytree": 0.9, "gamma": 0.0},
    {"max_depth": 8, "min_child_weight": 1, "subsample": 0.9, "colsample_bytree": 0.7, "gamma": 0.0},
    {"max_depth": 8, "min_child_weight": 1, "subsample": 0.9, "colsample_bytree": 0.9, "gamma": 0.0},
    {"max_depth": 8, "min_child_weight": 1, "subsample": 0.8, "colsample_bytree": 0.8, "gamma": 1.0},
]

results = []
best_model = None
best_val_rmse = float("inf")

for i, extra in enumerate(param_candidates):
    print(f"\n=== Candidate {i+1}/{len(param_candidates)}: {extra} ===")
    model_i, val_rmse_i, rmse_test_i, r2_test_i = train_and_eval(
        base_params, extra, dtrain, dval, dtest
    )
    print(f"Val RMSE : {val_rmse_i:.2f}")
    print(f"Test RMSE: {rmse_test_i:.2f}")
    print(f"Test R²  : {r2_test_i:.3f}")

    results.append((extra, val_rmse_i, rmse_test_i, r2_test_i))

    if val_rmse_i < best_val_rmse:
        best_val_rmse = val_rmse_i
        best_model = model_i

print("\n=== Summary of candidates ===")
for extra, val_rmse_i, rmse_test_i, r2_test_i in results:
    print(f"{extra} -> Val RMSE {val_rmse_i:.2f}, Test RMSE {rmse_test_i:.2f}, Test R² {r2_test_i:.3f}")



=== Candidate 1/8: {'max_depth': 6, 'min_child_weight': 1, 'subsample': 0.8, 'colsample_bytree': 0.8, 'gamma': 0.0} ===
Val RMSE : 15.91
Test RMSE: 15.62
Test R²  : 0.505

=== Candidate 2/8: {'max_depth': 8, 'min_child_weight': 1, 'subsample': 0.8, 'colsample_bytree': 0.8, 'gamma': 0.0} ===
Val RMSE : 15.34
Test RMSE: 14.99
Test R²  : 0.544

=== Candidate 3/8: {'max_depth': 10, 'min_child_weight': 1, 'subsample': 0.8, 'colsample_bytree': 0.8, 'gamma': 0.0} ===
Val RMSE : 15.07
Test RMSE: 14.78
Test R²  : 0.557

=== Candidate 4/8: {'max_depth': 8, 'min_child_weight': 5, 'subsample': 0.8, 'colsample_bytree': 0.8, 'gamma': 0.0} ===
Val RMSE : 15.27
Test RMSE: 14.92
Test R²  : 0.549

=== Candidate 5/8: {'max_depth': 8, 'min_child_weight': 1, 'subsample': 0.7, 'colsample_bytree': 0.9, 'gamma': 0.0} ===
Val RMSE : 15.37
Test RMSE: 15.02
Test R²  : 0.543

=== Candidate 6/8: {'max_depth': 8, 'min_child_weight': 1, 'subsample': 0.9, 'colsample_bytree': 0.7, 'gamma': 0.0} ===
Val RMSE : 15.24
T

In [10]:
# HY2: save best XGBoost model from the search

best_model.save_model("spotify_xgb_best.json")
print("Saved best XGBoost model.")


Saved best XGBoost model.


In [11]:
# CELL D1: teacher (XGBoost) predictions on train/val/test

import xgboost as xgb
import numpy as np

# DMatrix without labels (labels aren't needed to predict)
dtrain_no = xgb.DMatrix(X_train)
dval_no   = xgb.DMatrix(X_val)
dtest_no  = xgb.DMatrix(X_test)

y_train_teacher = best_model.predict(dtrain_no)
y_val_teacher   = best_model.predict(dval_no)
y_test_teacher  = best_model.predict(dtest_no)

print("Teacher preds (train) shape:", y_train_teacher.shape)
print("Teacher preds (val)   shape:", y_val_teacher.shape)
print("Teacher preds (test)  shape:", y_test_teacher.shape)

print("\nTeacher train preds stats:")
print("min:", float(y_train_teacher.min()), "max:", float(y_train_teacher.max()))
print("mean:", float(y_train_teacher.mean()))


Teacher preds (train) shape: (79797,)
Teacher preds (val)   shape: (17103,)
Teacher preds (test)  shape: (17100,)

Teacher train preds stats:
min: -6.048275470733643 max: 98.70370483398438
mean: 33.24055480957031


In [12]:
# CELL SD1: larger student MLP for distillation

import torch
import torch.nn as nn

input_dim = X_train.shape[1]
print("Input dim:", input_dim)

class SpotifyStudentMLP_Big(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 768),
            nn.ReLU(),
            nn.Dropout(0.25),
            nn.Linear(768, 512),
            nn.ReLU(),
            nn.Dropout(0.25),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(0.20),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(0.10),
            nn.Linear(128, 1)   # predicts teacher's popularity
        )

    def forward(self, x):
        return self.net(x).squeeze(-1)


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
student_big = SpotifyStudentMLP_Big(input_dim).to(device)

print(student_big)


Input dim: 128
SpotifyStudentMLP_Big(
  (net): Sequential(
    (0): Linear(in_features=128, out_features=768, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.25, inplace=False)
    (3): Linear(in_features=768, out_features=512, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.25, inplace=False)
    (6): Linear(in_features=512, out_features=256, bias=True)
    (7): ReLU()
    (8): Dropout(p=0.2, inplace=False)
    (9): Linear(in_features=256, out_features=128, bias=True)
    (10): ReLU()
    (11): Dropout(p=0.1, inplace=False)
    (12): Linear(in_features=128, out_features=1, bias=True)
  )
)


In [13]:
# H1: hybrid distillation dataset (true labels + teacher labels)

import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np

class SpotifyHybridDataset(Dataset):
    def __init__(self, X, y_true, y_teacher):
        self.X = torch.from_numpy(X)
        self.y_true = torch.from_numpy(y_true.astype(np.float32))
        self.y_teacher = torch.from_numpy(y_teacher.astype(np.float32))

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, idx):
        return self.X[idx], self.y_true[idx], self.y_teacher[idx]


train_hybrid_ds = SpotifyHybridDataset(X_train, y_train, y_train_teacher)
val_hybrid_ds   = SpotifyHybridDataset(X_val,   y_val,   y_val_teacher)
test_hybrid_ds  = SpotifyHybridDataset(X_test,  y_test,  y_test_teacher)

train_hybrid_loader = DataLoader(train_hybrid_ds, batch_size=512, shuffle=True)
val_hybrid_loader   = DataLoader(val_hybrid_ds,   batch_size=2048, shuffle=False)
test_hybrid_loader  = DataLoader(test_hybrid_ds,  batch_size=2048, shuffle=False)

len(train_hybrid_ds), len(val_hybrid_ds), len(test_hybrid_ds)


(79797, 17103, 17100)

In [14]:
# H2: create a new hybrid student model (same architecture as big student)

import torch.nn as nn

input_dim = X_train.shape[1]
print("Input dim:", input_dim)

class SpotifyStudentMLP_Big(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 768),
            nn.ReLU(),
            nn.Dropout(0.25),
            nn.Linear(768, 512),
            nn.ReLU(),
            nn.Dropout(0.25),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(0.20),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(0.10),
            nn.Linear(128, 1)
        )

    def forward(self, x):
        return self.net(x).squeeze(-1)


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
student_hybrid = SpotifyStudentMLP_Big(input_dim).to(device)

print(student_hybrid)


Input dim: 128
SpotifyStudentMLP_Big(
  (net): Sequential(
    (0): Linear(in_features=128, out_features=768, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.25, inplace=False)
    (3): Linear(in_features=768, out_features=512, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.25, inplace=False)
    (6): Linear(in_features=512, out_features=256, bias=True)
    (7): ReLU()
    (8): Dropout(p=0.2, inplace=False)
    (9): Linear(in_features=256, out_features=128, bias=True)
    (10): ReLU()
    (11): Dropout(p=0.1, inplace=False)
    (12): Linear(in_features=128, out_features=1, bias=True)
  )
)


In [15]:
# H3: train hybrid student (true + teacher loss)

from sklearn.metrics import mean_squared_error, r2_score

def rmse(y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))

alpha = 0.6   # weight for TRUE labels vs teacher

criterion = nn.MSELoss(reduction="mean")
optimizer = torch.optim.AdamW(student_hybrid.parameters(), lr=1e-3, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode="min", factor=0.5, patience=3, verbose=True
)

num_epochs = 120
best_val_rmse_true = float("inf")
best_state_dict_h = None
patience = 12
epochs_no_improve = 0

for epoch in range(1, num_epochs + 1):
    # ---- Train ----
    student_hybrid.train()
    train_losses = []

    for xb, yb_true, yb_teacher in train_hybrid_loader:
        xb = xb.to(device)
        yb_true = yb_true.to(device)
        yb_teacher = yb_teacher.to(device)

        optimizer.zero_grad()
        preds = student_hybrid(xb)
        loss_true    = criterion(preds, yb_true)
        loss_teacher = criterion(preds, yb_teacher)
        loss = alpha * loss_true + (1 - alpha) * loss_teacher
        loss.backward()
        optimizer.step()

        train_losses.append(loss.item())

    train_loss = np.mean(train_losses)

    # ---- Validate ----
    student_hybrid.eval()
    with torch.no_grad():
        X_val_torch = torch.from_numpy(X_val).float().to(device)
        val_preds = student_hybrid(X_val_torch).cpu().numpy()

    # vs TRUE
    val_rmse_true = rmse(y_val, val_preds)
    val_r2_true   = r2_score(y_val, val_preds)

    # vs TEACHER
    val_rmse_teacher = rmse(y_val_teacher, val_preds)
    val_r2_teacher   = r2_score(y_val_teacher, val_preds)

    scheduler.step(val_rmse_true)

    if val_rmse_true + 0.05 < best_val_rmse_true:
        best_val_rmse_true = val_rmse_true
        best_state_dict_h = student_hybrid.state_dict()
        epochs_no_improve = 0
    else:
        epochs_no_improve += 1
        if epochs_no_improve >= patience:
            print(f"Early stopping at epoch {epoch}.")
            break

    if epoch == 1 or epoch % 5 == 0:
        print(
            f"Epoch {epoch:03d} | "
            f"Train Loss: {train_loss:.4f} | "
            f"Val RMSE vs TRUE: {val_rmse_true:.2f} | "
            f"Val R² vs TRUE: {val_r2_true:.3f} | "
            f"Val RMSE vs TEACHER: {val_rmse_teacher:.2f} | "
            f"Val R² vs TEACHER: {val_r2_teacher:.3f}"
        )

print("\nBest validation RMSE vs TRUE (hybrid):", best_val_rmse_true)
if best_state_dict_h is not None:
    student_hybrid.load_state_dict(best_state_dict_h)




Epoch 001 | Train Loss: 422.4624 | Val RMSE vs TRUE: 19.45 | Val R² vs TRUE: 0.245 | Val RMSE vs TEACHER: 12.67 | Val R² vs TEACHER: 0.423
Epoch 005 | Train Loss: 334.1342 | Val RMSE vs TRUE: 19.25 | Val R² vs TRUE: 0.261 | Val RMSE vs TEACHER: 12.41 | Val R² vs TEACHER: 0.447
Epoch 010 | Train Loss: 323.7138 | Val RMSE vs TRUE: 18.84 | Val R² vs TRUE: 0.292 | Val RMSE vs TEACHER: 11.60 | Val R² vs TEACHER: 0.517
Epoch 015 | Train Loss: 318.2666 | Val RMSE vs TRUE: 18.82 | Val R² vs TRUE: 0.293 | Val RMSE vs TEACHER: 11.51 | Val R² vs TEACHER: 0.524
Epoch 020 | Train Loss: 311.9448 | Val RMSE vs TRUE: 18.66 | Val R² vs TRUE: 0.306 | Val RMSE vs TEACHER: 11.27 | Val R² vs TEACHER: 0.544
Epoch 025 | Train Loss: 308.6765 | Val RMSE vs TRUE: 18.60 | Val R² vs TRUE: 0.310 | Val RMSE vs TEACHER: 11.17 | Val R² vs TEACHER: 0.552
Epoch 030 | Train Loss: 303.5456 | Val RMSE vs TRUE: 18.55 | Val R² vs TRUE: 0.314 | Val RMSE vs TEACHER: 11.05 | Val R² vs TEACHER: 0.562
Epoch 035 | Train Loss: 297

In [16]:
# H4: test evaluation for hybrid student

student_hybrid.eval()
with torch.no_grad():
    X_test_torch = torch.from_numpy(X_test).float().to(device)
    test_preds_hybrid = student_hybrid(X_test_torch).cpu().numpy()

# 1) vs TRUE
test_rmse_true_h = rmse(y_test, test_preds_hybrid)
test_r2_true_h   = r2_score(y_test, test_preds_hybrid)

# 2) vs TEACHER
test_rmse_teacher_h = rmse(y_test_teacher, test_preds_hybrid)
test_r2_teacher_h   = r2_score(y_test_teacher, test_preds_hybrid)

print("=== Hybrid Student vs TRUE labels ===")
print(f"Test RMSE: {test_rmse_true_h:.2f}")
print(f"Test R²  : {test_r2_true_h:.3f}")

print("\n=== Hybrid Student vs TEACHER (XGBoost) ===")
print(f"Test RMSE (student vs teacher preds): {test_rmse_teacher_h:.2f}")
print(f"Test R²   (student vs teacher preds): {test_r2_teacher_h:.3f}")


=== Hybrid Student vs TRUE labels ===
Test RMSE: 17.64
Test R²  : 0.369

=== Hybrid Student vs TEACHER (XGBoost) ===
Test RMSE (student vs teacher preds): 9.92
Test R²   (student vs teacher preds): 0.651


In [17]:
# NIO MODEL: choose the hybrid student MLP

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
nio_model = student_hybrid.to(device)   # <-- this is the model we'll use for NIO
nio_model.eval()

from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

def rmse(y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))

with torch.no_grad():
    X_test_torch = torch.from_numpy(X_test).float().to(device)
    nio_test_preds = nio_model(X_test_torch).cpu().numpy()

nio_rmse = rmse(y_test, nio_test_preds)
nio_r2   = r2_score(y_test, nio_test_preds)

print("=== Final NIO model (hybrid MLP) ===")
print(f"Test RMSE: {nio_rmse:.2f}")
print(f"Test R²  : {nio_r2:.3f}")


=== Final NIO model (hybrid MLP) ===
Test RMSE: 17.64
Test R²  : 0.369


In [18]:
# Get feature names from the encoded feature DataFrame
feature_names = list(X_encoded.columns)
print("Number of features:", len(feature_names))
print(feature_names[:10])  # just to see a few


Number of features: 128
['Unnamed: 0', 'duration_ms', 'explicit', 'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness']


In [19]:
import joblib
import torch

print("Number of features:", len(feature_names))

joblib.dump(scaler, "nio_scaler.pkl")
joblib.dump(feature_names, "nio_feature_names.pkl")
torch.save(nio_model.state_dict(), "nio_model_hybrid.pth")

print("Saved: nio_scaler.pkl, nio_feature_names.pkl, nio_model_hybrid.pth")


Number of features: 128
Saved: nio_scaler.pkl, nio_feature_names.pkl, nio_model_hybrid.pth


In [20]:
# Helper to reload NIO artifacts later

import joblib
import torch.nn as nn

class SpotifyNioMLP(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 768),
            nn.ReLU(),
            nn.Dropout(0.25),
            nn.Linear(768, 512),
            nn.ReLU(),
            nn.Dropout(0.25),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(0.20),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(0.10),
            nn.Linear(128, 1),
        )

    def forward(self, x):
        return self.net(x).squeeze(-1)

def load_nio_artifacts():
    scaler = joblib.load("nio_scaler.pkl")
    feature_names = joblib.load("nio_feature_names.pkl")

    input_dim = len(feature_names)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model = SpotifyNioMLP(input_dim).to(device)
    state = torch.load("nio_model_hybrid.pth", map_location=device)
    model.load_state_dict(state)
    model.eval()
    return model, scaler, feature_names, device

print("NIO artifacts saving & loader are ready.")


NIO artifacts saving & loader are ready.


In [21]:
# CELL N1 – Load NIO model + scaler + feature names

import torch
import torch.nn as nn
import numpy as np
import joblib

# Define the same architecture we used for the hybrid student
class SpotifyNioMLP(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 768),
            nn.ReLU(),
            nn.Dropout(0.25),
            nn.Linear(768, 512),
            nn.ReLU(),
            nn.Dropout(0.25),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(0.20),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(0.10),
            nn.Linear(128, 1),
        )

    def forward(self, x):
        return self.net(x).squeeze(-1)


def load_nio_artifacts():
    scaler = joblib.load("nio_scaler.pkl")
    feature_names = joblib.load("nio_feature_names.pkl")

    input_dim = len(feature_names)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model = SpotifyNioMLP(input_dim).to(device)
    state = torch.load("nio_model_hybrid.pth", map_location=device)
    model.load_state_dict(state)
    model.eval()

    return model, scaler, feature_names, device

model_nio, scaler_nio, feature_names_nio, device = load_nio_artifacts()

print("✅ Loaded NIO model")
print("Device        :", device)
print("Num features  :", len(feature_names_nio))
print("First 10 cols :", feature_names_nio[:10])


✅ Loaded NIO model
Device        : cuda
Num features  : 128
First 10 cols : ['Unnamed: 0', 'duration_ms', 'explicit', 'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness']


In [22]:
# CELL N2 – Pick one test example and inspect it

import numpy as np
import torch

# Choose an index in the test set
idx = 0   # you can change this later (0, 10, 100, etc.)

# Get scaled features and true label for that test example
x0_scaled = X_test[idx:idx+1]          # shape (1, num_features)
y0_true   = float(y_test[idx])

x0_scaled_t = torch.from_numpy(x0_scaled).float().to(device)

# Model prediction (on scaled input)
model_nio.eval()
with torch.no_grad():
    y0_pred = model_nio(x0_scaled_t).item()

print(f"Test example index: {idx}")
print(f"True popularity   : {y0_true:.1f}")
print(f"Pred popularity   : {y0_pred:.2f}")

# Inverse-transform to original feature space for interpretability
x0_unscaled = scaler_nio.inverse_transform(x0_scaled)[0]
example_dict = dict(zip(feature_names_nio, x0_unscaled))

# Show some key audio features (only if they exist in the columns)
key_feats = [
    "danceability", "energy", "loudness", "speechiness",
    "acousticness", "instrumentalness", "liveness",
    "valence", "tempo", "duration_ms"
]

print("\nKey features for this track:")
for f in key_feats:
    if f in example_dict:
        print(f"  {f:15s}: {example_dict[f]:.4f}")


Test example index: 0
True popularity   : 50.0
Pred popularity   : 44.66

Key features for this track:
  danceability   : 0.3690
  energy         : 0.5980
  loudness       : -6.9840
  speechiness    : 0.0304
  acousticness   : 0.0051
  instrumentalness: -0.0000
  liveness       : 0.1760
  valence        : 0.0466
  tempo          : 148.0140
  duration_ms    : 440247.0000


In [23]:
# CELL N3 – choose which features NIO can change and prepare optimization variables

import torch
import numpy as np

# Features we allow NIO to tweak (continuous, interpretable audio features)
opt_feature_names = [
    "danceability",
    "energy",
    "loudness",
    "speechiness",
    "acousticness",
    "instrumentalness",
    "liveness",
    "valence",
    "tempo",
]

# Find their indices in the feature vector
opt_indices = [feature_names_nio.index(f) for f in opt_feature_names if f in feature_names_nio]

print("Optimizable features and indices:")
for f in opt_feature_names:
    if f in feature_names_nio:
        print(f"  {f:15s} -> idx {feature_names_nio.index(f)}")
    else:
        print(f"  {f:15s} -> NOT FOUND in feature_names_nio")

print("\nTotal optimizable dims:", len(opt_indices))

# Original scaled vector for this example (from CELL N2)
x0_scaled_np = x0_scaled.copy()  # shape (1, D)

# Extract just the optimizable dimensions (in *scaled* space)
x_params_init = x0_scaled_np[:, opt_indices]  # shape (1, k)

# This is what we will actually optimize
x_params = torch.tensor(
    x_params_init,
    dtype=torch.float32,
    device=device,
    requires_grad=True
)

print("x_params shape:", x_params.shape)
print("Initial optimizable feature values (scaled):")
print(x_params.detach().cpu().numpy())


Optimizable features and indices:
  danceability    -> idx 3
  energy          -> idx 4
  loudness        -> idx 6
  speechiness     -> idx 8
  acousticness    -> idx 9
  instrumentalness -> idx 10
  liveness        -> idx 11
  valence         -> idx 12
  tempo           -> idx 13

Total optimizable dims: 9
x_params shape: torch.Size([1, 9])
Initial optimizable feature values (scaled):
[[-1.139786   -0.17247687  0.2535058  -0.51311016 -0.93167    -0.5041119
  -0.19725525 -1.6488018   0.8628363 ]]


In [24]:
# CELL N4 – Optimize the 9 audio features (NIO loop)

# Hyperparameters for NIO
num_steps = 300
lr = 0.05
lambda_l2 = 0.1      # regularization strength to stay close to original (in scaled space)
clamp_min = -3.0
clamp_max =  3.0

# Keep a copy of the initial params for the regularization term
x_params_init_t = torch.tensor(
    x_params_init,
    dtype=torch.float32,
    device=device
)

optimizer_nio = torch.optim.Adam([x_params], lr=lr)

history = []

for step in range(1, num_steps + 1):
    optimizer_nio.zero_grad()

    # Build full scaled input: start from original x0, replace only the optimizable indices
    x_full = torch.from_numpy(x0_scaled_np).float().to(device)  # shape (1, D)
    x_full = x_full.clone()  # so we can modify it
    x_full[:, opt_indices] = x_params  # plug in optimized params

    # Predict popularity
    y_pred = model_nio(x_full)  # shape (1,)
    # We want to MAXIMIZE y_pred → MINIMIZE (-y_pred)
    loss_pop = -y_pred.mean()

    # Regularization: keep x_params close to original (in scaled space)
    l2_term = torch.mean((x_params - x_params_init_t) ** 2)
    loss = loss_pop + lambda_l2 * l2_term

    # Backprop through inputs
    loss.backward()
    optimizer_nio.step()

    # Clamp to keep values in a reasonable scaled range
    with torch.no_grad():
        x_params.clamp_(clamp_min, clamp_max)

    # Logging
    if step == 1 or step % 50 == 0 or step == num_steps:
        loss_val = float(loss.item())
        pop_val  = float(y_pred.item())
        l2_val   = float(l2_term.item())
        history.append((step, pop_val, loss_val, l2_val))
        print(f"Step {step:03d} | Pred popularity: {pop_val:6.2f} | "
              f"Loss: {loss_val:8.4f} | L2: {l2_val:.4f}")

print("\nNIO optimization finished.")
print("Final optimized scaled params:")
print(x_params.detach().cpu().numpy())


Step 001 | Pred popularity:  44.66 | Loss: -44.6559 | L2: 0.0000
Step 050 | Pred popularity:  82.21 | Loss: -81.8328 | L2: 3.8058
Step 100 | Pred popularity:  89.05 | Loss: -88.4729 | L2: 5.7627
Step 150 | Pred popularity:  89.06 | Loss: -88.4876 | L2: 5.7563
Step 200 | Pred popularity:  89.05 | Loss: -88.4713 | L2: 5.7634
Step 250 | Pred popularity:  89.06 | Loss: -88.4858 | L2: 5.7558
Step 300 | Pred popularity:  89.05 | Loss: -88.4760 | L2: 5.7541

NIO optimization finished.
Final optimized scaled params:
[[ 0.5770211 -3.         3.        -3.        -3.        -3.
   3.        -3.         3.       ]]


In [25]:
# CELL N5 – Decode optimized features and compare before vs after

import numpy as np
import torch

# 1) Build full scaled input with optimized params
x_opt_scaled = x0_scaled_np.copy()  # start from original
x_opt_scaled[:, opt_indices] = x_params.detach().cpu().numpy()

# 2) Inverse-transform both original and optimized to original feature space
x0_unscaled = scaler_nio.inverse_transform(x0_scaled_np)[0]
x_opt_unscaled = scaler_nio.inverse_transform(x_opt_scaled)[0]

base_dict = dict(zip(feature_names_nio, x0_unscaled))
opt_dict  = dict(zip(feature_names_nio, x_opt_unscaled))

# 3) Get predicted popularity for optimized track
x_opt_t = torch.from_numpy(x_opt_scaled).float().to(device)
model_nio.eval()
with torch.no_grad():
    y_opt_pred = model_nio(x_opt_t).item()

print("=== Popularity before vs after NIO ===")
print(f"True popularity        : {y0_true:.1f}")
print(f"Pred popularity (orig) : {y0_pred:.2f}")
print(f"Pred popularity (NIO)  : {y_opt_pred:.2f}")

# Optionally clamp to [0, 100] just for interpretability
y_opt_clamped = max(0.0, min(100.0, y_opt_pred))
print(f"Pred popularity (NIO, clamped to 0–100): {y_opt_clamped:.2f}")

# 4) Show key feature changes
key_feats = [
    "danceability", "energy", "loudness", "speechiness",
    "acousticness", "instrumentalness", "liveness",
    "valence", "tempo", "duration_ms"
]

print("\n=== Key audio features: original vs optimized ===")
print(f"{'feature':15s}  {'orig':>12s}  {'opt':>12s}  {'Δ (opt - orig)':>15s}")
for f in key_feats:
    if f in base_dict:
        orig = base_dict[f]
        opt  = opt_dict[f]
        delta = opt - orig
        print(f"{f:15s}  {orig:12.4f}  {opt:12.4f}  {delta:15.4f}")


=== Popularity before vs after NIO ===
True popularity        : 50.0
Pred popularity (orig) : 44.66
Pred popularity (NIO)  : 89.06
Pred popularity (NIO, clamped to 0–100): 89.06

=== Key audio features: original vs optimized ===
feature                  orig           opt   Δ (opt - orig)
danceability           0.3690        0.6669           0.2979
energy                 0.5980       -0.1132          -0.7112
loudness              -6.9840        6.8290          13.8130
speechiness            0.0304       -0.2325          -0.2629
acousticness           0.0051       -0.6827          -0.6878
instrumentalness       -0.0000       -0.7726          -0.7726
liveness               0.1760        0.7847           0.6087
valence                0.0466       -0.3037          -0.3503
tempo                148.0140      212.0820          64.0680
duration_ms       440247.0000   440247.0000           0.0000


In [26]:
# CELL N6 – Clamp optimized features to realistic ranges and recompute popularity

import numpy as np
import torch

# 1) Start from the optimized original-space features we already had
base_dict = dict(zip(feature_names_nio, x0_unscaled))
opt_dict  = dict(zip(feature_names_nio, x_opt_unscaled))

# Define realistic ranges for the main audio features
ranges = {
    "danceability":      (0.0, 1.0),
    "energy":            (0.0, 1.0),
    "speechiness":       (0.0, 1.0),
    "acousticness":      (0.0, 1.0),
    "instrumentalness":  (0.0, 1.0),
    "liveness":          (0.0, 1.0),
    "valence":           (0.0, 1.0),
    "loudness":         (-60.0, 0.0),   # dB range
    "tempo":            (60.0, 220.0),  # bpm
}

# 2) Build a clamped version in original feature space
opt_clamped_dict = opt_dict.copy()

for feat, (vmin, vmax) in ranges.items():
    if feat in opt_clamped_dict:
        v = opt_clamped_dict[feat]
        opt_clamped_dict[feat] = max(vmin, min(vmax, v))

# 3) Turn dict back into ordered vector, then scale and predict
x_opt_clamped_unscaled = np.array(
    [opt_clamped_dict[f] for f in feature_names_nio]
).reshape(1, -1)

x_opt_clamped_scaled = scaler_nio.transform(x_opt_clamped_unscaled)

x_opt_clamped_t = torch.from_numpy(x_opt_clamped_scaled).float().to(device)
model_nio.eval()
with torch.no_grad():
    y_opt_clamped_pred = model_nio(x_opt_clamped_t).item()

print("=== Popularity with realistic clamped features ===")
print(f"Pred popularity (orig)         : {y0_pred:.2f}")
print(f"Pred popularity (NIO raw)      : {y_opt_pred:.2f}")
print(f"Pred popularity (NIO clamped)  : {y_opt_clamped_pred:.2f}")
print(
    "Pred popularity (NIO clamped, 0–100): "
    f"{max(0.0, min(100.0, y_opt_clamped_pred)):.2f}"
)

print("\n=== Key features: original vs NIO (clamped) ===")
header = "{:15s}  {:>12s}  {:>12s}  {:>15s}".format(
    "feature", "orig", "opt_clamp", "Δ (opt - orig)"
)
print(header)

for f in key_feats:
    if f in base_dict:
        orig = base_dict[f]
        optc = opt_clamped_dict[f]
        delta = optc - orig
        line = "{:15s}  {:12.4f}  {:12.4f}  {:15.4f}".format(
            f, orig, optc, delta
        )
        print(line)


=== Popularity with realistic clamped features ===
Pred popularity (orig)         : 44.66
Pred popularity (NIO raw)      : 89.06
Pred popularity (NIO clamped)  : 59.49
Pred popularity (NIO clamped, 0–100): 59.49

=== Key features: original vs NIO (clamped) ===
feature                  orig     opt_clamp   Δ (opt - orig)
danceability           0.3690        0.6669           0.2979
energy                 0.5980        0.0000          -0.5980
loudness              -6.9840        0.0000           6.9840
speechiness            0.0304        0.0000          -0.0304
acousticness           0.0051        0.0000          -0.0051
instrumentalness       -0.0000        0.0000           0.0000
liveness               0.1760        0.7847           0.6087
valence                0.0466        0.0000          -0.0466
tempo                148.0140      212.0820          64.0680
duration_ms       440247.0000   440247.0000           0.0000




In [27]:
# CELL N7 – Helper: run NIO for a single test example

import numpy as np
import torch

# Just in case, define the same key features list here
key_feats = [
    "danceability", "energy", "loudness", "speechiness",
    "acousticness", "instrumentalness", "liveness",
    "valence", "tempo", "duration_ms"
]

# Ranges for clamping in original feature space
NIO_RANGES = {
    "danceability":      (0.0, 1.0),
    "energy":            (0.0, 1.0),
    "speechiness":       (0.0, 1.0),
    "acousticness":      (0.0, 1.0),
    "instrumentalness":  (0.0, 1.0),
    "liveness":          (0.0, 1.0),
    "valence":           (0.0, 1.0),
    "loudness":         (-60.0, 0.0),
    "tempo":            (60.0, 220.0),
}


def run_nio_for_index(
    idx,
    num_steps=300,
    lr=0.05,
    lambda_l2=0.1,
    clamp_min=-3.0,
    clamp_max=3.0,
    verbose=True,
):
    """
    Run NIO on one example from X_test / y_test.

    Returns a dict with:
      - idx, y_true, y_pred_orig, y_pred_nio_raw, y_pred_nio_clamped
      - base_dict (original features), opt_dict (raw NIO), opt_clamped_dict
    """

    # -------- 1) Base example --------
    x0_scaled = X_test[idx:idx+1]          # (1, D)
    y0_true = float(y_test[idx])

    x0_scaled_t = torch.from_numpy(x0_scaled).float().to(device)
    model_nio.eval()
    with torch.no_grad():
        y0_pred = model_nio(x0_scaled_t).item()

    # Original features in real units
    x0_unscaled = scaler_nio.inverse_transform(x0_scaled)[0]
    base_dict = dict(zip(feature_names_nio, x0_unscaled))

    # -------- 2) Set up optimizable params (scaled space) --------
    x_params_init = x0_scaled[:, opt_indices]            # (1, k)
    x_params = torch.tensor(
        x_params_init,
        dtype=torch.float32,
        device=device,
        requires_grad=True
    )
    x_params_init_t = torch.tensor(
        x_params_init,
        dtype=torch.float32,
        device=device
    )

    optimizer_nio = torch.optim.Adam([x_params], lr=lr)

    history = []

    # -------- 3) NIO loop --------
    for step in range(1, num_steps + 1):
        optimizer_nio.zero_grad()

        # full scaled input
        x_full = torch.from_numpy(x0_scaled).float().to(device)
        x_full = x_full.clone()
        x_full[:, opt_indices] = x_params

        # predicted popularity
        y_pred = model_nio(x_full)
        loss_pop = -y_pred.mean()  # maximize y_pred

        # regularization
        l2_term = torch.mean((x_params - x_params_init_t) ** 2)
        loss = loss_pop + lambda_l2 * l2_term

        loss.backward()
        optimizer_nio.step()

        with torch.no_grad():
            x_params.clamp_(clamp_min, clamp_max)

        if step == 1 or step % 50 == 0 or step == num_steps:
            history.append(
                (step, float(y_pred.item()), float(loss.item()), float(l2_term.item()))
            )

    # -------- 4) Decode NIO result (raw) --------
    x_opt_scaled = x0_scaled.copy()
    x_opt_scaled[:, opt_indices] = x_params.detach().cpu().numpy()

    x_opt_unscaled = scaler_nio.inverse_transform(x_opt_scaled)[0]
    opt_dict = dict(zip(feature_names_nio, x_opt_unscaled))

    x_opt_t = torch.from_numpy(x_opt_scaled).float().to(device)
    model_nio.eval()
    with torch.no_grad():
        y_opt_pred = model_nio(x_opt_t).item()

    # -------- 5) Clamp to realistic ranges in original space --------
    opt_clamped_dict = opt_dict.copy()
    for feat, (vmin, vmax) in NIO_RANGES.items():
        if feat in opt_clamped_dict:
            v = opt_clamped_dict[feat]
            opt_clamped_dict[feat] = max(vmin, min(vmax, v))

    x_opt_clamped_unscaled = np.array(
        [opt_clamped_dict[f] for f in feature_names_nio]
    ).reshape(1, -1)

    x_opt_clamped_scaled = scaler_nio.transform(x_opt_clamped_unscaled)
    x_opt_clamped_t = torch.from_numpy(x_opt_clamped_scaled).float().to(device)
    with torch.no_grad():
        y_opt_clamped_pred = model_nio(x_opt_clamped_t).item()

    if verbose:
        print(f"=== NIO summary for idx {idx} ===")
        print(f"True popularity             : {y0_true:.1f}")
        print(f"Pred popularity (orig)      : {y0_pred:.2f}")
        print(f"Pred popularity (NIO raw)   : {y_opt_pred:.2f}")
        print(f"Pred popularity (NIO clamp) : {y_opt_clamped_pred:.2f}")
        print("Some steps:")
        for (s, yp, ls, l2) in history:
            print(f"  step {s:03d} | pred {yp:6.2f} | loss {ls:8.4f} | L2 {l2:.4f}")

    return {
        "idx": idx,
        "y_true": y0_true,
        "y_pred_orig": y0_pred,
        "y_pred_nio_raw": y_opt_pred,
        "y_pred_nio_clamped": y_opt_clamped_pred,
        "base_dict": base_dict,
        "opt_dict": opt_dict,
        "opt_clamped_dict": opt_clamped_dict,
        "history": history,
    }

print("✅ run_nio_for_index function defined.")


✅ run_nio_for_index function defined.


In [28]:
# CELL N8 – Evaluate NIO on multiple test examples

import random
import numpy as np
import pandas as pd

# How many test songs to evaluate with NIO
num_examples = 30

random.seed(0)
indices = random.sample(range(len(X_test)), num_examples)

results = []
print(f"Running NIO on {num_examples} random test examples...\n")

for idx in indices:
    res = run_nio_for_index(idx, verbose=False)
    results.append(res)

# Build a DataFrame with key metrics
rows = []
for r in results:
    rows.append({
        "idx": r["idx"],
        "y_true": r["y_true"],
        "orig_pred": r["y_pred_orig"],
        "nio_clamped_pred": r["y_pred_nio_clamped"],
        "delta_pred": r["y_pred_nio_clamped"] - r["y_pred_orig"],
    })

df_nio_test = pd.DataFrame(rows)

print("=== First 10 NIO results ===")
print(df_nio_test.head(10))

# Summary statistics
mean_orig = df_nio_test["orig_pred"].mean()
mean_nio  = df_nio_test["nio_clamped_pred"].mean()
mean_delta = df_nio_test["delta_pred"].mean()
median_delta = df_nio_test["delta_pred"].median()

improved = (df_nio_test["delta_pred"] > 0).mean() * 100
improved_2 = (df_nio_test["delta_pred"] > 2).mean() * 100
worse = (df_nio_test["delta_pred"] < 0).mean() * 100

print("\n=== NIO overall impact (on these examples) ===")
print(f"Mean pred (orig)      : {mean_orig:.2f}")
print(f"Mean pred (NIO clamp) : {mean_nio:.2f}")
print(f"Mean Δ (NIO - orig)   : {mean_delta:.2f}")
print(f"Median Δ              : {median_delta:.2f}")
print(f"% tracks improved     : {improved:.1f}%")
print(f"% tracks improved >2  : {improved_2:.1f}%")
print(f"% tracks got worse    : {worse:.1f}%")


Running NIO on 30 random test examples...





=== First 10 NIO results ===
     idx  y_true  orig_pred  nio_clamped_pred  delta_pred
0  12623    49.0  26.382135         58.651276   32.269140
1  13781    10.0   9.454279         74.568123   65.113844
2   1326     0.0   6.718862        112.266083  105.547221
3   8484    46.0  44.036263         61.589630   17.553368
4  16753    55.0  58.657852         81.372902   22.715050
5  15922    49.0  42.636627        103.211586   60.574959
6  13268    12.0  14.291389         32.043793   17.752403
7   9938    75.0  31.266548         95.321579   64.055031
8  15617    52.0  25.982153        139.561310  113.579157
9  11732    49.0  51.322922         80.300209   28.977287

=== NIO overall impact (on these examples) ===
Mean pred (orig)      : 33.13
Mean pred (NIO clamp) : 86.65
Mean Δ (NIO - orig)   : 53.52
Median Δ              : 34.90
% tracks improved     : 100.0%
% tracks improved >2  : 100.0%
% tracks got worse    : 0.0%


