In [None]:
# Audio Model Evaluation (Excluding 'Other')
import os
import gc
import pickle
from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.metrics import balanced_accuracy_score, f1_score, confusion_matrix

# ── Configuration ──
MODEL_VERSION    = 36
BATCH_SIZE       = 256
DATA_PREP_DIR    = Path("../../Data/Experiment_Data/5. PreprocessDataset_Window_Audio")
LABEL_BIN_PATH   = Path("../../LabelBinarizer/with_shower/Multimodal/Label_binarizer_5_classes.pkl")
TF_MODEL_PATH    = Path(f"../../Models/tensorflow_model/Audio/Audio_new_ver{MODEL_VERSION}/Right/16000/Audio_Scratch.h5")
RAW_DATA_DIR     = Path("../../Data/Experiment_Data/2. PreprocessDataset")
CLASS_NAMES      = ['Shower','Tooth_brushing','Washing_hands','Wiping','Vacuum_Cleaner']
CLASS_DISPLAY    = ["Shower","Tooth Brushing","Washing Hands","Wiping","Vacuum Cleaner"]

# Helper Functions
def load_data(test_pid: str):
    """Load and filter out 'Other' examples."""
    pkl_path = DATA_PREP_DIR / test_pid / f"{test_pid}_preprocessing.pkl"
    with open(pkl_path, 'rb') as f:
        data = pickle.load(f)
    audio = data['Audio']
    y_full = np.array(data['Activity'])

    mask = (y_full != 'Other')
    return audio[mask].astype('float32'), y_full[mask]


def evaluate_model(model, lb, pid_list):
    ba_dict = {}
    all_true, all_pred = [], []

    for pid in pid_list:
        X, y_true = load_data(pid)
        y_prob = model.predict(X, batch_size=BATCH_SIZE, verbose=0)
        y_pred = lb.inverse_transform(np.argmax(y_prob, axis=1))

        ba = balanced_accuracy_score(y_true, y_pred) * 100
        f1 = f1_score(y_true, y_pred, average='weighted') * 100
        ba_dict[pid] = {'BalancedAccuracy': round(ba,2), 'WeightedF1': round(f1,2)}

        all_true.extend(y_true)
        all_pred.extend(y_pred)

        # free memory
        del X, y_prob; gc.collect()

    return ba_dict, all_true, all_pred

def plot_confusion(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred, labels=CLASS_NAMES)
    cm_pct = cm.astype(float) / cm.sum(axis=1)[:, None] * 100
    cm_pct = np.nan_to_num(cm_pct)

    fig, ax = plt.subplots(figsize=(8,6))
    im = ax.imshow(cm_pct, cmap='Blues', vmin=0, vmax=100)

    ax.set_xticks(range(len(CLASS_NAMES)))
    ax.set_yticks(range(len(CLASS_NAMES)))
    ax.set_xticklabels(CLASS_DISPLAY, rotation=45, ha='right')
    ax.set_yticklabels(CLASS_DISPLAY)

    thresh = cm_pct.max()/2
    for i in range(len(CLASS_NAMES)):
        for j in range(len(CLASS_NAMES)):
            color = 'white' if cm_pct[i,j]>thresh else 'black'
            ax.text(j, i, f"{cm_pct[i,j]:.2f}", ha='center', va='center', color=color, fontsize=10)

    ax.set_title('MultiModal Confusion Matrix')
    plt.colorbar(im, ax=ax)
    plt.tight_layout()
    plt.show()

# Main
if __name__ == '__main__':
    # Load label binarizer and model
    with open(LABEL_BIN_PATH, 'rb') as f:
        lb = pickle.load(f)
    model = tf.keras.models.load_model(str(TF_MODEL_PATH))

    # Determine participants
    participants = [d.name for d in RAW_DATA_DIR.iterdir() if d.is_dir()]
    print("Participants:", participants)

    # Evaluate
    ba_results, y_true_all, y_pred_all = evaluate_model(model, lb, participants)
    print(ba_results)

    # Plot confusion matrix
    plot_confusion(y_true_all, y_pred_all)
