In [None]:
# Audio Training Pipeline
import os
import sys
import random
import numpy as np
import pandas as pd
import tensorflow as tf
import pickle as pkl
from pathlib import Path
import gc
from sklearn.preprocessing import LabelBinarizer
from sklearn.utils import class_weight
from sklearn.metrics import balanced_accuracy_score, f1_score
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import (
    Input, Conv2D, MaxPooling2D, Flatten, Dense
)
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from tensorflow.keras import optimizers

# Configuration
PROJECT_ROOT = Path.cwd()
sys.path.append(str(PROJECT_ROOT / ".." / ".." / ".." / "HCAR"))

# GPU configuration
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)

# Seeds for reproducibility
SEED = 20
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

# Training settings
BATCH_SIZE = 64
EPOCHS = 30
HAND = 'Right'
SUB_SR = 16000
IMU_SR = 50
WINDOW_LEN_IMU = 2 * IMU_SR      # 2 seconds
HOP_LEN_IMU    = int(0.2 * IMU_SR)  # 0.2s stride

# Model input shape
IMU_INPUT_SHAPE = (100, 9)
AUDIO_INPUT_SHAPE = (96, 64, 1)

# Model version
MODEL_VERSION = 1

# Participants
TRAIN_PIDS = [100]
VALID_PIDS = [101]
TEST_PIDS = [102, 103]

# Paths
BASE_PATH = Path("../../")
DATA_PATH = BASE_PATH / "Data/Train_Data/3_MMExamples"
MODEL_SAVE_PATH = BASE_PATH / f"Models/tensorflow_model/Audio/Audio_ver{MODEL_VERSION}/{HAND}"
ACC_SAVE_PATH = BASE_PATH / f"../../Results/Train_Result/Model_Accuracy/Audio/Audio_ver{MODEL_VERSION}/{HAND}"
PRED_SAVE_PATH = BASE_PATH / f"../../Results/Train_Result/Model_Preds/Audio/Audio_ver{MODEL_VERSION}/{HAND}"
NORM_PATH = BASE_PATH / f"Normalization_params/normalization_params_{HAND}_ver{MODEL_VERSION}.pkl"

REF_MODEL_PATH = BASE_PATH / "Models/Reference_Model"

MODEL_SAVE_PATH.mkdir(parents=True, exist_ok=True)
ACC_SAVE_PATH.mkdir(parents=True, exist_ok=True)
PRED_SAVE_PATH.mkdir(parents=True, exist_ok=True)

# Utility functions
def load_audio_examples(pid_set):
    X, y = [], []
    for pid in pid_set:
        folder = DATA_PATH / f'{pid}' / HAND / str(SUB_SR)
        for file in folder.iterdir():
            pid_str, activity, trial = file.stem.split('---')
            data = pkl.load(open(file, 'rb'))
            X.append(data)
            y.extend([[pid_str, activity]] * len(data))
    X = np.concatenate(X, axis=0)
    y = pd.DataFrame(y, columns=['pid', 'activity'])
    return X, y

# Model definition
def create_audio_model(input_shape=(96,64,1), num_classes=6):
    inp = Input(shape=input_shape, name='Audio_input')
    x = Conv2D(64, (3,3), activation='relu', padding='same')(inp)
    x = MaxPooling2D()(x)
    x = Flatten()(x)
    x = Dense(256, activation='relu')(x)
    out = Dense(num_classes, activation='softmax')(x)
    model = Model(inp, out)
    model.compile(
        optimizer=optimizers.Adam(1e-3),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    return model

def evaluate_and_save(model: Model, X_audio, y_labels, lb: LabelBinarizer):
    # Perform filewise prediction, compute metrics, save results.
    preds = model.predict(X_audio, batch_size=BATCH_SIZE)
    df = pd.DataFrame(preds, columns=lb.classes_)
    df['file_name'] = y_labels[:,1]
    df['y_pred'] = df.drop(columns=['file_name']).idxmax(axis=1)
    df['y_true'] = df['file_name']

    ba = balanced_accuracy_score(df['y_true'], df['y_pred'])
    f1 = f1_score(df['y_true'], df['y_pred'], average='weighted')
    return df, ba, f1

# Main Execution
def main():

    # Data loading
    X_train, y_train_df = load_audio_examples(TRAIN_PIDS)
    X_val, y_val_df = load_audio_examples(VALID_PIDS)
    X_test, y_test_df = load_audio_examples(TEST_PIDS)

    # Label encoding
    lb = LabelBinarizer()
    y_train = lb.fit_transform(y_train_df['activity'])
    y_val = lb.transform(y_val_df['activity'])
    y_test = lb.transform(y_test_df['activity'])

    # Class weights
    train_weights = class_weight.compute_class_weight('balanced', classes=lb.classes_, y=y_train_df['activity'])
    class_weights = dict(enumerate(train_weights))

    # Prepare model
    audio_model = create_audio_model()

    # Callbacks
    callbacks = [
        ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, min_lr=1e-6),
        EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    ]

    # Training
    epochs, batch_size = 30, 32
    audio_model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=epochs,
        batch_size=batch_size,
        class_weight=class_weights,
        callbacks=callbacks
    )

    # Save model
    audio_model.save(MODEL_SAVE_PATH / HAND / str(SUB_SR) / 'audio_model.h5')

    # Evaluate
    results_df, ba, f1 = evaluate_and_save(audio_model, X_test, y_test, lb)
    results_df.to_csv(PRED_SAVE_PATH / f'{HAND}.csv', index=False)

    with open(ACC_SAVE_PATH / 'Audio_Scratch.txt', 'w') as f:
        f.write(f'Balanced Accuracy: {ba}\nF1 Score: {f1}\n')

    # Cleanup
    del audio_model, X_train, y_train, X_val, y_val, X_test, y_test
    gc.collect()

if __name__ == '__main__':
    main()