In [None]:
import os
import pickle
import gc
from pathlib import Path

import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import balanced_accuracy_score, f1_score, confusion_matrix
import matplotlib.pyplot as plt
from tqdm import tqdm

# Configuration
SEED        = 4
MODEL_VER   = 5
SR          = 16000
PARTICIPANT = "16"

BASE_DATA   = Path("../../Data/Pilot_Data/3. PreprocessDataset_for_Audio")
TFLITE_PATH = Path(f"../../Models/tflite_model/Audio/Audio_ver{MODEL_VER}.tflite")
LABEL_BIN   = Path(f"../../LabelBinarizer/Audio/Audio_Label_binarizer_ver{MODEL_VER}.pkl")
PRED_DIR    = Path(f"../../Model_Preds/Audio/{PARTICIPANT}")
ACC_FILE    = Path(f"../../Model_Accuracy/Audio/{PARTICIPANT}_accuracy.txt")
CM_DIR      = Path(f"../../Confusion_Matrix/{PARTICIPANT}")

CLASS_ORDER = ['Tooth_brushing', 'Washing_hands', 'Wiping', 'Vacuum_Cleaner', 'Other']

# Reproducibility
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
tf.random.set_seed(SEED)
np.random.seed(SEED)

# Ensure output directories exist
PRED_DIR.mkdir(parents=True, exist_ok=True)
CM_DIR.mkdir(parents=True, exist_ok=True)

def make_interpreter(model_path: Path) -> tf.lite.Interpreter:
    """Load and return a TFLite interpreter."""
    interp = tf.lite.Interpreter(model_path=str(model_path))
    interp.allocate_tensors()
    return interp

interpreter = make_interpreter(TFLITE_PATH)
input_details  = interpreter.get_input_details()
output_details = interpreter.get_output_details()
audio_idx      = input_details[0]['index']
output_idx     = output_details[0]['index']

def infer_participant(pid: str, lb: LabelBinarizer) -> pd.DataFrame:
    """
    Run TFLite inference on each audio frame for a single participant.
    Returns a DataFrame of per-frame softmax scores plus true/pred labels.
    """
    pkl_path = BASE_DATA / pid / f"{pid}_preprocessing_for_audio.pkl"
    with open(pkl_path, "rb") as f:
        data = pickle.load(f)
    X = data["Audio"].astype(np.float32)
    y = np.array(data["Activity"], dtype=object)

    softmaxes = []
    for frame in tqdm(X, desc=f"Infer {pid}"):
        inp = frame[None, ..., None]  # add batch and channel dims
        interpreter.set_tensor(audio_idx, inp)
        interpreter.invoke()
        softmaxes.append(interpreter.get_tensor(output_idx)[0])

    df = pd.DataFrame(softmaxes, columns=lb.classes_)
    df["y_true"] = y
    df["y_pred"] = df.drop(columns=["y_true"]).idxmax(axis=1)
    return df

with open(LABEL_BIN, "rb") as f:
    lb = pickle.load(f)

print("Label mapping:", dict(zip(lb.classes_, lb.transform(lb.classes_))))

df_preds = infer_participant(PARTICIPANT, lb)
df_preds.to_csv(PRED_DIR / f"{PARTICIPANT}_ver{MODEL_VER}.csv", index=False)

ba = balanced_accuracy_score(df_preds["y_true"], df_preds["y_pred"])
f1 = f1_score(df_preds["y_true"], df_preds["y_pred"], average="weighted")
print(f"Balanced Accuracy: {ba:.4f}")
print(f"F1 Score:           {f1:.4f}")

with open(ACC_FILE, "w") as f:
    f.write(f"Balanced Accuracy: {ba:.4f}\nF1 Score: {f1:.4f}\n")

cm = confusion_matrix(df_preds["y_true"], df_preds["y_pred"], labels=CLASS_ORDER)
cm_pct = 100 * cm.astype(float) / cm.sum(axis=1, keepdims=True)
cm_pct = np.nan_to_num(cm_pct)

fig, ax = plt.subplots(figsize=(8, 6))
im = ax.imshow(cm_pct, cmap="Greens", vmin=0, vmax=100)
ax.set_xticks(range(len(CLASS_ORDER)))
ax.set_xticklabels(CLASS_ORDER, rotation=45, ha="right")
ax.set_yticks(range(len(CLASS_ORDER)))
ax.set_yticklabels(CLASS_ORDER)
for i in range(len(CLASS_ORDER)):
    for j in range(len(CLASS_ORDER)):
        color = "white" if cm_pct[i, j] > 50 else "black"
        ax.text(j, i, f"{cm_pct[i, j]:.1f}%", ha="center", va="center", color=color)

ax.set_xlabel("Predicted")
ax.set_ylabel("True")
ax.set_title("Confusion Matrix (%)")
fig.colorbar(im, ax=ax)
plt.tight_layout()
plt.savefig(CM_DIR / f"{PARTICIPANT}_confusion.png")
plt.show()
