In [None]:
# Audio-Only Model Evaluation Pipeline
import os
import pickle
from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.metrics import balanced_accuracy_score, f1_score, confusion_matrix

# Configuration
PROJECT_ROOT = Path.cwd()
DATA_DIR      = PROJECT_ROOT / '../Data/Experiment_Data/2. PreprocessDataset'
TF_MODEL_PATH = PROJECT_ROOT / '../Models/tensorflow_model/Audio/Audio_new_ver36/Right/16000/Audio_Scratch.h5'
OUTPUT_ACC    = PROJECT_ROOT / '../Result/Experiment_Result/Model_Accuracy'
OUTPUT_PRED   = PROJECT_ROOT / '../Result/Experiment_Result/Model_Preds/Audio'
OUTPUT_CM     = PROJECT_ROOT / '../Result/Experiment_Result/Confusion_Matrix'
LB_PATH       = PROJECT_ROOT / '../LabelBinarizer/Multimodal/Label_binarizer_6_classes.pkl'

# Load label binarizer and classes
def load_lb():
    return pickle.load(open(LB_PATH,'rb'))

# Prediction function for TF-Keras audio model
def predict_audio_kfold(tf_model, pid: str, lb, batch_size: int = 256) -> pd.DataFrame:
    # Load data
    pkl_path = DATA_DIR / pid / f'{pid}_preprocessing_1hour.pkl'
    data = pickle.load(open(pkl_path,'rb'))
    audio = data['Audio'].astype(np.float32)[...,None]
    y_true = np.array(data['Activity'])

    # Predict
    preds = tf_model.predict(audio, batch_size=batch_size)
    df = pd.DataFrame(preds, columns=lb.classes_)
    df['y_true'] = y_true
    df['y_pred'] = df.drop(columns=['y_true']).idxmax(axis=1)
    return df

# Evaluation & saving
if __name__ == '__main__':
    lb = load_lb()
    classes = lb.classes_
    OUTPUT_ACC.mkdir(parents=True, exist_ok=True)
    OUTPUT_PRED.mkdir(parents=True, exist_ok=True)
    OUTPUT_CM.mkdir(parents=True, exist_ok=True)

    # Load model
    tf_model = tf.keras.models.load_model(str(TF_MODEL_PATH))

    # Loop participants
    for pid in sorted([d.name for d in DATA_DIR.iterdir() if d.is_dir()]):
        print(f"Evaluating audio model for {pid}")

        df_res = predict_audio_kfold(tf_model, pid, lb)

        # Metrics
        ba = balanced_accuracy_score(df_res['y_true'], df_res['y_pred'])
        f1 = f1_score(df_res['y_true'], df_res['y_pred'], average='weighted')

        # Save predictions and metrics
        out_dir = OUTPUT_PRED / pid
        out_dir.mkdir(exist_ok=True)
        df_res.to_csv(out_dir / f'{pid}_audio_tf.csv', index=False)
        with open(OUTPUT_ACC / f'{pid}_audio_tf.txt','w') as f:
            f.write(f'Balanced Accuracy: {ba:.4f}\nF1 Score: {f1:.4f}')

        # Confusion matrix
        cm = confusion_matrix(df_res['y_true'], df_res['y_pred'], labels=classes)
        cm_pct = cm.astype(float)/cm.sum(axis=1)[:,None]*100
        fig, ax = plt.subplots(figsize=(8,6))
        im = ax.imshow(cm_pct, cmap='Greens', vmin=0, vmax=100)
        ticks = range(len(classes))
        ax.set_xticks(ticks); ax.set_xticklabels(classes, rotation=45)
        ax.set_yticks(ticks); ax.set_yticklabels(classes)
        thresh = cm_pct.max()/2
        for i in ticks:
            for j in ticks:
                col = 'white' if cm_pct[i,j]>thresh else 'black'
                ax.text(j,i,f'{cm_pct[i,j]:.1f}', ha='center', color=col)
        plt.tight_layout()
        cm_dir = OUTPUT_CM / pid / 'Audio'
        cm_dir.mkdir(exist_ok=True, parents=True)
        fig.savefig(cm_dir / f'{pid}_audio_cm.png')
        plt.close(fig)
        print(f"{pid}: BA={ba:.3f}, F1={f1:.3f}")
