# AesthEval — Train models

This notebook trains Early / Late / Hybrid fusion models
on `all_features.csv` from previous step and saves every model + preprocessing artifact
into a specified models directory for later comparison.


## PARAMETERS


In [None]:
DATA_PATH = "./all_features.csv"   # path to the dataset
MODELS_DIR = "./aesth_models"      # directory to save models & artifacts
RANDOM_STATE = 420                          # global random seed
SAMPLE_SIZE = None                         # set to integer N to use N random samples; set to None to use full dataset
EARLY_RF_N_ESTIMATORS = 300                # RF estimators for early-fusion baseline
BASE_RF_N_ESTIMATORS = 200                 # RF estimators for per-stem base models
HYBRID_GBR_N_ESTIMATORS = 400              # GBR estimators for hybrid model
SAVE_PREPROCESSORS = True                  # whether to save preprocessing artifacts
# ============================================
print('Parameters set. DATA_PATH=', DATA_PATH)

Parameters set. DATA_PATH= ./all_features.csv


## Imports and create model directory


In [None]:

import os, json, time, datetime, gc
from pathlib import Path
import numpy as np, pandas as pd

from sklearn.model_selection import train_test_split, KFold, cross_val_predict
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.feature_selection import VarianceThreshold
from sklearn.decomposition import PCA
from sklearn.linear_model import Ridge
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import root_mean_squared_error, mean_absolute_error, r2_score

import joblib # to save the models

print('Libraries imported.')

Models directory: aesth_models


### create directory


In [None]:
MODELS_DIR = Path(MODELS_DIR)
MODELS_DIR.mkdir(parents=True, exist_ok=True)
print('Models directory:', MODELS_DIR)

## Utility functions for saving artifacts and logging experiments


### Timestamp


In [None]:
import hashlib, datetime

def timestamp():
    return datetime.datetime.now().strftime("%Y%m%d_%H%M%S")


### to hash the object


In [None]:
def hash_obj(obj):
    s = json.dumps(obj, sort_keys=True, default=str).encode()
    return hashlib.md5(s).hexdigest()[:8]

### to save the model


In [None]:
def save_artifact(obj, name_prefix, models_dir=MODELS_DIR, metadata=None):
    ts = timestamp()
    if metadata is None:
        metadata = {}
    base_name = f"{name_prefix}_{ts}_{hash_obj(metadata)}"
    joblib_path = models_dir / f"{base_name}.joblib"
    json_path = models_dir / f"{base_name}.json"
    joblib.dump(obj, joblib_path)
    metadata_to_save = dict(metadata)
    metadata_to_save.update({
        "artifact": str(joblib_path.name),
        "saved_at": ts
    })
    with open(json_path, "w") as f:
        json.dump(metadata_to_save, f, indent=2, default=str)
    print(f"Saved: {joblib_path.name}, metadata: {json_path.name}")
    return str(joblib_path), str(json_path)

### to log it down


In [None]:
EXPERIMENTS_CSV = MODELS_DIR / "experiments_log.csv"
def log_experiment(row_dict, experiments_csv=EXPERIMENTS_CSV):
    df_row = pd.DataFrame([row_dict])
    if experiments_csv.exists():
        df_row.to_csv(experiments_csv, mode='a', header=False, index=False)
    else:
        df_row.to_csv(experiments_csv, index=False)
    print("Logged experiment:", row_dict.get('name'))


## Load Dataset


In [None]:
print('Loading:', DATA_PATH)
df = pd.read_csv(DATA_PATH)
print('Original shape:', df.shape)

### Less samples to check code


In [None]:
if SAMPLE_SIZE is not None:
    SAMPLE_SIZE = int(SAMPLE_SIZE)
    if SAMPLE_SIZE <= 0:
        raise ValueError('SAMPLE_SIZE must be None or a positive integer')
    if SAMPLE_SIZE > df.shape[0]:
        print('Requested SAMPLE_SIZE > nrows; using full dataframe instead.')
    else:
        df = df.sample(n=SAMPLE_SIZE, random_state=RANDOM_STATE).reset_index(drop=True)
        print('Sampled shape:', df.shape)

display(df.head())


Loading: ./all_features.csv
Original shape: (500, 280)


Unnamed: 0,song_id,harmonic_mfcc_mean_0,harmonic_mfcc_mean_1,harmonic_mfcc_mean_2,harmonic_mfcc_mean_3,harmonic_mfcc_mean_4,harmonic_mfcc_mean_5,harmonic_mfcc_mean_6,harmonic_mfcc_mean_7,harmonic_mfcc_mean_8,...,memorability_n_annotators,clarity,clarity_std,clarity_n_annotators,naturalness,naturalness_std,naturalness_n_annotators,gender,num_annotators,harmonic_tempo
0,0,-234.011032,93.20713,-19.473122,15.425479,-7.123735,3.265974,-15.967982,-3.900991,-7.250444,...,4,4.0,0.353553,4,4.375,0.414578,4,male,4,
1,1,-340.562622,125.753433,60.19212,12.968719,8.342021,5.732525,-2.957875,2.134922,-7.303535,...,4,2.0,0.707107,4,2.0,0.707107,4,male,4,
2,2,-296.270172,96.759354,-10.450113,19.952477,-0.722513,-1.394691,-11.906327,-3.497026,-13.923201,...,4,2.875,0.892679,4,3.25,0.829156,4,male,4,
3,3,-270.38028,95.47731,-25.079927,45.097095,14.71188,14.881142,6.295753,16.971743,4.258063,...,4,2.125,0.73951,4,2.25,0.433013,4,male,4,
4,4,-107.451653,17.363043,-24.690931,52.119267,5.604693,9.739891,2.110556,5.655409,6.965531,...,4,1.625,0.81968,4,1.75,0.559017,4,female,4,


## Define columns


In [None]:
targets = ['coherence','musicality','memorability','clarity','naturalness']
for t in targets:
    if t not in df.columns:
        raise ValueError(f"Target column {t} not found in dataset")

harmonic_cols   = [c for c in df.columns if c.startswith('harmonic_')]
percussive_cols = [c for c in df.columns if c.startswith('percussive_')]
original_cols   = [c for c in df.columns if c.startswith('original_')]

meta_cols = [c for c in df.columns if c not in harmonic_cols + percussive_cols + original_cols + targets]
print('Counts -> harmonic, percussive, original:', len(harmonic_cols), len(percussive_cols), len(original_cols))
print('Metadata cols detected:', meta_cols)


Counts -> harmonic, percussive, original: 88 87 87
Metadata cols detected: ['song_id', 'coherence_std', 'coherence_n_annotators', 'musicality_std', 'musicality_n_annotators', 'memorability_std', 'memorability_n_annotators', 'clarity_std', 'clarity_n_annotators', 'naturalness_std', 'naturalness_n_annotators', 'gender', 'num_annotators']


## Train/Test Split


In [None]:
drop_cols = [c for c in meta_cols if 'id' in c.lower()]
X = df.drop(columns=drop_cols + targets).copy()
y = df[targets].copy()

# One-hot small-cardinality categorical metadata if present
cat_cols = [c for c in X.columns if X[c].dtype == 'object' and X[c].nunique() < 30]
if cat_cols:
    X = pd.get_dummies(X, columns=cat_cols, drop_first=True)
    print('One-hot encoded metadata columns:', cat_cols)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=RANDOM_STATE)
print('Train/Test shapes:', X_train.shape, X_test.shape)

One-hot encoded metadata columns: ['gender']
Train/Test shapes: (425, 274) (75, 274)


## Evaluation function


In [None]:
def eval_preds(y_true, y_pred, names=targets):
    rows = []
    for i, t in enumerate(names):
        rmse = root_mean_squared_error(y_true.iloc[:,i], y_pred[:,i])
        mae = mean_absolute_error(y_true.iloc[:,i], y_pred[:,i])
        r2 = r2_score(y_true.iloc[:,i], y_pred[:,i])
        rows.append({'target': t, 'rmse': float(rmse), 'mae': float(mae), 'r2': float(r2)})
    return pd.DataFrame(rows)

## Global preprocessing pipeline

variance threshold -> median impute -> robust scale


In [None]:
numeric_features = [c for c in harmonic_cols + percussive_cols + original_cols if c in X.columns]

global_preproc = Pipeline([
    ('var', VarianceThreshold(1e-5)),
    ('impute', SimpleImputer(strategy='median')),
    ('scale', RobustScaler())
])

if len(numeric_features) > 0:
    global_preproc.fit(X_train[numeric_features])
    if SAVE_PREPROCESSORS:
        save_artifact(global_preproc, 'preproc_global', metadata={'type':'global_preproc','features_count':len(numeric_features)})
else:
    print('No numeric features detected matching harmonic/percussive/original prefixes.')


Saved: preproc_global_20251125_085822_31e5c799.joblib, metadata: preproc_global_20251125_085822_31e5c799.json


## Early Fusion: Train RF on all features


In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.multioutput import MultiOutputRegressor

if len(numeric_features) > 0:
    early_pipe = Pipeline([
        ('pre', global_preproc),
        ('model', MultiOutputRegressor(RandomForestRegressor(n_estimators=EARLY_RF_N_ESTIMATORS, n_jobs=-1, random_state=RANDOM_STATE)))
    ])
    early_pipe.fit(X_train[numeric_features], y_train)
    
    y_pred_early = early_pipe.predict(X_test[numeric_features])
    df_eval_early = eval_preds(y_test, y_pred_early)
    
    print('Early fusion evaluation:')
    display(df_eval_early)
    
    if SAVE_PREPROCESSORS:
        save_artifact(early_pipe, 'early_rf', metadata={'fusion':'early','model':'RandomForest','n_features':len(numeric_features)})
    else:
        joblib.dump(early_pipe, MODELS_DIR + '/early_rf.joblib')
else:
    print('Skipping early fusion: no numeric features available.')


Early fusion evaluation:


Unnamed: 0,target,rmse,mae,r2
0,coherence,0.850769,0.70472,0.376646
1,musicality,0.800818,0.641452,0.500424
2,memorability,0.788027,0.67339,0.408859
3,clarity,0.837665,0.678682,0.416418
4,naturalness,0.786678,0.668941,0.47178


Saved: early_rf_20251125_085841_6a9f1200.joblib, metadata: early_rf_20251125_085841_6a9f1200.json


## Late Fusion: per-stem models + meta-learner


In [None]:
def make_group_preproc(cols):
    p = Pipeline([('var', VarianceThreshold(1e-5)),
                  ('impute', SimpleImputer(strategy='median')),
                  ('scale', RobustScaler())])
    p.fit(X_train[cols])
    return p

h_cols = [c for c in harmonic_cols if c in X.columns]
p_cols = [c for c in percussive_cols if c in X.columns]
o_cols = [c for c in original_cols if c in X.columns]

pre_h = make_group_preproc(h_cols) if h_cols else None
pre_p = make_group_preproc(p_cols) if p_cols else None
pre_o = make_group_preproc(o_cols) if o_cols else None

base_h = MultiOutputRegressor(RandomForestRegressor(n_estimators=BASE_RF_N_ESTIMATORS, n_jobs=-1, random_state=RANDOM_STATE)) if h_cols else None
base_p = MultiOutputRegressor(RandomForestRegressor(n_estimators=BASE_RF_N_ESTIMATORS, n_jobs=-1, random_state=RANDOM_STATE)) if p_cols else None
base_o = MultiOutputRegressor(RandomForestRegressor(n_estimators=BASE_RF_N_ESTIMATORS, n_jobs=-1, random_state=RANDOM_STATE)) if o_cols else None

if h_cols:
    base_h.fit(pre_h.transform(X_train[h_cols]), y_train)
    if SAVE_PREPROCESSORS:
        save_artifact(pre_h, 'pre_harmonic', metadata={'cols':len(h_cols)})
        save_artifact(base_h, 'base_harmonic_rf', metadata={'cols':len(h_cols),'model':'RandomForest'})
if p_cols:
    base_p.fit(pre_p.transform(X_train[p_cols]), y_train)
    if SAVE_PREPROCESSORS:
        save_artifact(pre_p, 'pre_percussive', metadata={'cols':len(p_cols)})
        save_artifact(base_p, 'base_percussive_rf', metadata={'cols':len(p_cols),'model':'RandomForest'})
if o_cols:
    base_o.fit(pre_o.transform(X_train[o_cols]), y_train)
    if SAVE_PREPROCESSORS:
        save_artifact(pre_o, 'pre_original', metadata={'cols':len(o_cols)})
        save_artifact(base_o, 'base_original_rf', metadata={'cols':len(o_cols),'model':'RandomForest'})

# Build meta features (note: for strict stacking use out-of-fold preds; this uses full-train preds for speed)
meta_tr = []
meta_te = []
if h_cols:
    meta_tr.append(base_h.predict(pre_h.transform(X_train[h_cols])))
    meta_te.append(base_h.predict(pre_h.transform(X_test[h_cols])))
if p_cols:
    meta_tr.append(base_p.predict(pre_p.transform(X_train[p_cols])))
    meta_te.append(base_p.predict(pre_p.transform(X_test[p_cols])))
if o_cols:
    meta_tr.append(base_o.predict(pre_o.transform(X_train[o_cols])))
    meta_te.append(base_o.predict(pre_o.transform(X_test[o_cols])))

if meta_tr:
    meta_tr = np.hstack(meta_tr)
    meta_te = np.hstack(meta_te)
    meta_learner = MultiOutputRegressor(Ridge())
    meta_learner.fit(meta_tr, y_train)
    y_pred_late = meta_learner.predict(meta_te)
    df_eval_late = eval_preds(y_test, y_pred_late)
    print('Late fusion evaluation:')
    display(df_eval_late)
    save_artifact(meta_learner, 'late_meta_ridge', metadata={'fusion':'late','base':'rf','meta':'ridge'})
else:
    print('Skipping late fusion: no base models trained.')


Saved: pre_harmonic_20251125_085846_a4823244.joblib, metadata: pre_harmonic_20251125_085846_a4823244.json
Saved: base_harmonic_rf_20251125_085846_b1c5bb48.joblib, metadata: base_harmonic_rf_20251125_085846_b1c5bb48.json
Saved: pre_percussive_20251125_085853_55f0afb5.joblib, metadata: pre_percussive_20251125_085853_55f0afb5.json
Saved: base_percussive_rf_20251125_085853_b491d409.joblib, metadata: base_percussive_rf_20251125_085853_b491d409.json
Saved: pre_original_20251125_085859_55f0afb5.joblib, metadata: pre_original_20251125_085859_55f0afb5.json
Saved: base_original_rf_20251125_085859_b491d409.joblib, metadata: base_original_rf_20251125_085859_b491d409.json
Late fusion evaluation:


Unnamed: 0,target,rmse,mae,r2
0,coherence,0.823509,0.669306,0.415952
1,musicality,0.763946,0.602148,0.545368
2,memorability,0.764474,0.648934,0.443667
3,clarity,0.836851,0.659154,0.417551
4,naturalness,0.762419,0.64287,0.503855


Saved: late_meta_ridge_20251125_085906_cb501d1b.joblib, metadata: late_meta_ridge_20251125_085906_cb501d1b.json


## Hybrid fusion

PCA per-group -> concat -> GBR


In [None]:
pca_h = pca_p = pca_o = None
H_tr = P_tr = O_tr = None
H_te = P_te = O_te = None

if h_cols:
    pca_h = PCA(n_components=0.95, svd_solver='full', random_state=RANDOM_STATE)
    H_tr = pca_h.fit_transform(pre_h.transform(X_train[h_cols]))
    H_te = pca_h.transform(pre_h.transform(X_test[h_cols]))
    if SAVE_PREPROCESSORS: save_artifact(pca_h, 'pca_harmonic', metadata={'cols':len(h_cols)})
if p_cols:
    pca_p = PCA(n_components=0.95, svd_solver='full', random_state=RANDOM_STATE)
    P_tr = pca_p.fit_transform(pre_p.transform(X_train[p_cols]))
    P_te = pca_p.transform(pre_p.transform(X_test[p_cols]))
    if SAVE_PREPROCESSORS: save_artifact(pca_p, 'pca_percussive', metadata={'cols':len(p_cols)})
if o_cols:
    pca_o = PCA(n_components=0.95, svd_solver='full', random_state=RANDOM_STATE)
    O_tr = pca_o.fit_transform(pre_o.transform(X_train[o_cols]))
    O_te = pca_o.transform(pre_o.transform(X_test[o_cols]))
    if SAVE_PREPROCESSORS: save_artifact(pca_o, 'pca_original', metadata={'cols':len(o_cols)})

parts_tr = [p for p in [H_tr, P_tr, O_tr] if p is not None]
parts_te = [p for p in [H_te, P_te, O_te] if p is not None]

if parts_tr:
    X_hybrid_tr = np.hstack(parts_tr)
    X_hybrid_te = np.hstack(parts_te)
    hybrid_model = MultiOutputRegressor(GradientBoostingRegressor(n_estimators=HYBRID_GBR_N_ESTIMATORS, random_state=RANDOM_STATE))
    hybrid_model.fit(X_hybrid_tr, y_train)
    y_pred_hybrid = hybrid_model.predict(X_hybrid_te)
    df_eval_hybrid = eval_preds(y_test, y_pred_hybrid)
    print('Hybrid fusion evaluation:')
    display(df_eval_hybrid)
    save_artifact(hybrid_model, 'hybrid_gbr', metadata={'fusion':'hybrid','reduced_dim':X_hybrid_tr.shape[1]})
else:
    print('Skipping hybrid fusion: no group representations available.')


Saved: pca_harmonic_20251125_085906_a4823244.joblib, metadata: pca_harmonic_20251125_085906_a4823244.json
Saved: pca_percussive_20251125_085906_55f0afb5.joblib, metadata: pca_percussive_20251125_085906_55f0afb5.json
Saved: pca_original_20251125_085906_55f0afb5.joblib, metadata: pca_original_20251125_085906_55f0afb5.json
Hybrid fusion evaluation:


Unnamed: 0,target,rmse,mae,r2
0,coherence,0.792816,0.640812,0.458677
1,musicality,0.772365,0.62415,0.535293
2,memorability,0.767793,0.604732,0.438827
3,clarity,0.841932,0.664599,0.410458
4,naturalness,0.807427,0.654685,0.443549


Saved: hybrid_gbr_20251125_090010_a73d1efc.joblib, metadata: hybrid_gbr_20251125_090010_a73d1efc.json


## train and save separate model per target


In [None]:
for t in targets:
    print('Training per-target RF for:', t)
    model = Pipeline([
        ('pre', global_preproc),
        ('rf', RandomForestRegressor(n_estimators=EARLY_RF_N_ESTIMATORS, n_jobs=-1, random_state=RANDOM_STATE))
    ])
    model.fit(X_train[numeric_features], y_train[t])
    save_artifact(model, f'separate_rf_{t}', metadata={'target': t, 'model': 'RandomForest'})
    pred = model.predict(X_test[numeric_features])
    rmse = root_mean_squared_error(y_test[t], pred)
    print(f'  {t} RMSE: {rmse:.4f}')


Training per-target RF for: coherence
Saved: separate_rf_coherence_20251125_090013_1ae6786d.joblib, metadata: separate_rf_coherence_20251125_090013_1ae6786d.json
  coherence RMSE: 0.8508
Training per-target RF for: musicality
Saved: separate_rf_musicality_20251125_090018_534ce428.joblib, metadata: separate_rf_musicality_20251125_090018_534ce428.json
  musicality RMSE: 0.8008
Training per-target RF for: memorability
Saved: separate_rf_memorability_20251125_090022_6cc2f731.joblib, metadata: separate_rf_memorability_20251125_090022_6cc2f731.json
  memorability RMSE: 0.7880
Training per-target RF for: clarity
Saved: separate_rf_clarity_20251125_090027_bcccad7f.joblib, metadata: separate_rf_clarity_20251125_090027_bcccad7f.json
  clarity RMSE: 0.8377
Training per-target RF for: naturalness
Saved: separate_rf_naturalness_20251125_090031_aa93c9f3.joblib, metadata: separate_rf_naturalness_20251125_090031_aa93c9f3.json
  naturalness RMSE: 0.7867


## Save manifest and evaluation summary


In [None]:
artifacts = []
for f in Path(MODELS_DIR).iterdir():
    artifacts.append({'name': f.name, 'path': str(f)})
manifest_path = Path(MODELS_DIR) / 'manifest.json'
with open(manifest_path, 'w') as f:
    json.dump(artifacts, f, indent=2)

# collect eval summaries if present in notebook variables (best-effort)
eval_frames = {}
if 'df_eval_early' in globals():
    eval_frames['early'] = df_eval_early.set_index('target')['rmse']
if 'df_eval_late' in globals():
    eval_frames['late'] = df_eval_late.set_index('target')['rmse']
if 'df_eval_hybrid' in globals():
    eval_frames['hybrid'] = df_eval_hybrid.set_index('target')['rmse']

if eval_frames:
    eval_summary = pd.concat(eval_frames, axis=1)
    eval_summary.reset_index(inplace=True)
    eval_summary.rename(columns={'index':'target'}, inplace=True)
    eval_summary.to_csv(Path(MODELS_DIR)/'eval_summary_rmse.csv', index=False)
    display(eval_summary)
else:
    print('No eval summaries to write.')


Unnamed: 0,target,early,late,hybrid
0,coherence,0.850769,0.823509,0.792816
1,musicality,0.800818,0.763946,0.772365
2,memorability,0.788027,0.764474,0.767793
3,clarity,0.837665,0.836851,0.841932
4,naturalness,0.786678,0.762419,0.807427
