In [None]:
# Step 2. Apply Saumya Artifact Filter
# Step 3. Apply My Artifact Filters
# Step 4. Graph Side By Side

In [None]:
import json
import numpy as np
import pandas as pd
import tensorflow as tf
import gc
from pathlib import Path
from sklearn.preprocessing import LabelEncoder
from artifactremoval.modelarch import *

# ── 1) Paths & load config ─────────────────────────────────────────────────
base_dir     = Path.cwd().parent
input_dir    = base_dir / "data" / "ratings" / "aggregate_data"
ensemble_dir = base_dir / "results" / "ensembles"
config_path  = ensemble_dir / "config.json"

with open(config_path, "r") as f:
    config = json.load(f)

# ── 2) Load and filter test data ───────────────────────────────────────────
test_data = load_most_recent_pickle(input_dir, prefix="spectral_test_")
filtered  = [e for e in test_data if "unique_id" in e]
ids       = [e["unique_id"] for e in filtered]

# ── 3) Preprocess spectra once ─────────────────────────────────────────────
train_data   = load_most_recent_pickle(input_dir, prefix="spectral_train_")
train_labels = [e["consensus_rating"] for e in train_data if e.get("consensus_rating") is not None]
le           = LabelEncoder().fit(train_labels)
raw_te, wat_te, f1_te, f2_te, y_te, _ = preprocess(filtered, label_encoder=le)

idxs = np.arange(len(y_te))
df = pd.DataFrame({"unique_id": ids})

# ── 4) Sequentially load each ensemble, predict, then unload ───────────────
for exp_key, info in config.items():
    # Load model
    model = tf.keras.models.load_model(info["model_path"])
    # Build input tensor
    X_test = build_tensor(idxs, info["channels"], raw_te, wat_te, f1_te, f2_te)
    # Predict probabilities
    probs = model.predict(X_test, batch_size=info["batch_size"]).ravel()
    # Threshold at Youden’s J
    preds = (probs >= info["threshold"]).astype(int)
    # Store results
    df[f"{exp_key}_score"]       = probs
    df[f"{exp_key}_pred"]        = preds
    df[f"{exp_key}_uncertainty"] = np.nan  # placeholder: compute if needed
    # Unload model to free memory
    del model
    tf.keras.backend.clear_session()
    gc.collect()


In [None]:

# ── 5) Save to CSV ─────────────────────────────────────────────────────────
out_csv = base_dir / "results" / "top3_ensemble_predictions.csv"
df.to_csv(out_csv, index=False)
print(f"Saved predictions to: {out_csv}")


In [None]:
import pandas as pd
from pathlib import Path
import ast

# 1) File paths
base_dir     = Path.cwd().parent
ratings_csv  = base_dir / "data" / "ratings" / "aggregate_data" / "aggregated_spectral_ratings.csv"
preds_csv    = base_dir / "results" / "top3_ensemble_predictions.csv"
output_csv   = base_dir / "results" / "combined_ratings_predictions.csv"

# 2) Load tables
df_ratings = pd.read_csv(ratings_csv)
df_preds   = pd.read_csv(preds_csv)

# 3) Merge on unique_id
df_combined = pd.merge(df_ratings, df_preds, on="unique_id", how="inner")

# 5) Save the combined table and inspect the first rows
df_combined.to_csv(output_csv, index=False)
print(df_combined.head(10))

