In [1]:
from pathlib import Path
from artifactremoval.modelarch import *

In [2]:
print("TF version:", tf.__version__)
print("Built with CUDA:", tf.sysconfig.get_build_info().get("cuda_version"))
print("Built with cuDNN:", tf.sysconfig.get_build_info().get("cudnn_version"))
print("GPUs detected by TF:", tf.config.list_physical_devices('GPU'))

TF version: 2.10.0
Built with CUDA: 64_112
Built with cuDNN: 64_8
GPUs detected by TF: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [None]:
# Loading and normalizing the data

base_dir = Path.cwd().parent
input_dir = base_dir / "data" / "ratings" / "aggregate_data"
train_data = load_most_recent_pickle(input_dir, prefix="spectral_train_")
hyperparam_dir = base_dir / "data" / "hyperparam_tuning"
model_dir = base_dir / "data" / "trained_models"

# --- Filter out entries with no consensus_rating ---
filtered = [e for e in train_data if e.get("consensus_rating") is not None]
print(f"Kept {len(filtered)}/{len(train_data)} spectra with valid labels")

raw_spec = np.stack([e['raw_spectrum'] for e in filtered])
water_spec = np.stack([e['water_siref'] for e in filtered])
fit1_spec = np.stack([e['midas_fit'] for e in filtered])
fit2_spec = np.stack([e['nnfit'] for e in filtered])
y = np.array([e['consensus_rating'] for e in filtered])

raw_z, fit1_z, fit2_z = [
    zscore_per_spectrum(arr) for arr in (raw_spec, fit1_spec, fit2_spec)
]

# 2) water: log-compress + min–max
eps = 1e-6
wlog = np.log10(np.abs(water_spec) + eps)
wmin = wlog.min(axis=1, keepdims=True)
wmax = wlog.max(axis=1, keepdims=True) + eps
water_norm = (wlog - wmin) / (wmax - wmin)

# 1-A  confirm shapes match
for name, arr in zip(
        ["raw","water","fit1","fit2"], 
        [raw_z, water_norm, fit2_z, fit1_z]):
    print(f"{name:>4}: {arr.shape},  min={arr.min():.2f}, max={arr.max():.2f}")

assert len({a.shape for a in [raw_spec, water_spec, fit1_spec, fit2_spec]}) == 1
assert y.shape[0] == raw_spec.shape[0]

bayes_tuner = MyBayesTuner(
    build_model,
    objective="val_accuracy",
    max_trials=40,          # total new BO trials (incl. your seeded 10)
    num_initial_points=0,   # skip random warm-up
    directory=str(hyperparam_dir),
    project_name="bayes_narrowed",
)

best_trial = bayes_tuner.get_best_hyperparameters(num_trials=1)[0]
best_hps = {
    "learning_rate"  : best_trial.get("learning_rate"),
    "dropout_rate1"  : best_trial.get("dropout_rate1"),
    "dropout_rate2"  : best_trial.get("dropout_rate2"),
    "dense_units"    : best_trial.get("dense_units"),
    "batch_size"     : best_trial.get("batch_size"),
}


In [None]:
EXPERIMENTS = {
    "baseline_raw"   : ["raw"],
    "raw+water"      : ["raw","water"],
    "raw+fits"       : ["raw","fit1","fit2"],
    "all_four"       : ["raw","water","fit1","fit2"],
    "fits+water"     : ["water","fit1","fit2"],
    "fit1_only"      : ["fit1"],
    "fit2_only"      : ["fit2"],
    "fits_combined"  : ["fit1", "fit2"],
}

# ---  choose whether to use tuned hyperparams or defaults ---------------
use_tuned = True     # ← flip to False for your 1e-4/128 default

# ---  run all experiments in a loop ------------------------------------
all_fold_results = {}

for exp_name, channels in EXPERIMENTS.items():
    print(f"\n▶️  Running experiment '{exp_name}'  (channels: {channels})")
    # pass best_hps if desired, else pass tuned_hps=None
    df = run_experiment(
        name     = exp_name + ("_tuned" if use_tuned else ""),
        model_dir = model_dir,
        channels = channels,
        raw_arr  = raw_spec,
        water_arr= water_spec,
        fit1_arr = fit1_spec,
        fit2_arr = fit2_spec,
        y        = y,
        k        = 5,
        seed     = 42,
        epochs   = 40,
        batch_size = 32,               
        tuned_hps = best_hps if use_tuned else None,
    )
    all_fold_results[exp_name] = df

combined = (
    pd.concat(all_fold_results)
      .rename_axis(index=["experiment","row"])
      .reset_index(level="row", drop=True)
      .reset_index()
)
print("\n=== Summary of all folds ===")
print(combined)