In [None]:
import os
import random
import pickle
from pathlib import Path

import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import LabelBinarizer
from sklearn.utils import class_weight
from sklearn.metrics import balanced_accuracy_score, f1_score
from tensorflow.keras import optimizers
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.models import Model
from tqdm import tqdm

# Configuration
SEED           = 4
MODEL_VERSION  = 36
TRAIN_IDS      = [10, 100, 101, 102, 103]
VALID_IDS      = [104]
TEST_IDS       = [3, 4]
LOWER_EDGES    = [100, 500]
UPPER_EDGES    = [6000, 8000, 10000]
BATCH_SIZE     = 32
EPOCHS         = 30
SR             = 16000
HAND           = "Right"
CLASS_NAMES    = ['Tooth_brushing','Washing_hands','Shower','Wiping','Vacuum_Cleaner']
NUM_CLASSES    = len(CLASS_NAMES)

BASE_DATA      = Path("../../Data/Train_Data/4. AudioExamples")
BASE_MODEL     = Path(f"../../Models/tensorflow_model/Audio/Audio_ver{MODEL_VERSION}")
BASE_RESULTS   = Path(f"../../Result/Train_Result/Model_Preds/Audio/Audio_ver{MODEL_VERSION}")
REF_MODEL_PATH = Path("../../Models/Reference_Model/example_model.hdf5")

# Seeds
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
os.environ["CUDA_VISIBLE_DEVICES"]  = "0"
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

# Ensure directories exist
for p in (BASE_MODEL, BASE_RESULTS):
    p.mkdir(parents=True, exist_ok=True)

# Data Loading

def load_audio_examples(pid_list, lower, upper):
    """
    Load and concatenate VGGish log-mel examples for given participants
    and mel-band edges.
    """
    X, y = [], []
    for pid in pid_list:
        folder = BASE_DATA / f"LEH{lower}_UEH{upper}" / str(pid) / HAND / str(SR)
        if not folder.is_dir():
            continue
        for pkl_file in tqdm(list(folder.iterdir()), desc=f"PID {pid}"):
            _, activity, _ = pkl_file.stem.split("---")
            with open(pkl_file, "rb") as f:
                examples = pickle.load(f)
            X.append(examples)
            y += [activity] * len(examples)
    X = np.concatenate(X, axis=0)
    return X, np.array(y)

# Model Creation
def warm_start_model(num_classes):
    """
    Load a reference model, replace its top layer for `num_classes` outputs,
    and compile it.
    """
    base = tf.keras.models.load_model(REF_MODEL_PATH)
    x    = base.layers[-2].output
    out  = Dense(num_classes, activation="softmax", name="output")(x)
    model = Model(base.input, out)
    model.compile(
        optimizer=optimizers.Adam(1e-3),
        loss="categorical_crossentropy",
        metrics=["accuracy"]
    )
    return model

# Training Loop
def train_and_save(lower, upper):
    print(f"\n=== Training LEH={lower}, UEH={upper} ===")

    # Load datasets
    X_train, y_train = load_audio_examples(TRAIN_IDS, lower, upper)
    X_val,   y_val   = load_audio_examples(VALID_IDS, lower, upper)

    # Shuffle training set
    idx = np.random.permutation(len(X_train))
    X_train, y_train = X_train[idx], y_train[idx]

    # One-hot encode
    lb        = LabelBinarizer().fit(CLASS_NAMES)
    Y_train   = lb.transform(y_train)
    Y_val     = lb.transform(y_val)

    # Compute class weights
    weights   = class_weight.compute_class_weight(
        "balanced", classes=CLASS_NAMES, y=y_train
    )
    cw_dict   = {i: w for i, w in enumerate(weights)}

    # Prepare model
    model = warm_start_model(len(CLASS_NAMES))

    # Callbacks
    reduce_lr = ReduceLROnPlateau("val_loss", factor=0.1, patience=3, min_lr=1e-6, verbose=1)
    early_stp = EarlyStopping("val_loss", patience=5, verbose=1)

    # Fit
    model.fit(
        X_train, Y_train,
        validation_data=(X_val, Y_val),
        epochs=EPOCHS,
        batch_size=BATCH_SIZE,
        class_weight=cw_dict,
        callbacks=[reduce_lr, early_stp],
        verbose=2
    )

    # Save model
    out_dir = BASE_MODEL / HAND / str(SR)
    out_dir.mkdir(parents=True, exist_ok=True)
    model.save(out_dir / f"LEH{lower}_UEH{upper}_model.h5")

# Execute training for all parameter combinations
for low in LOWER_EDGES:
    for high in UPPER_EDGES:
        train_and_save(low, high)
