In [6]:
import os
import random
import math
from pathlib import Path
import numpy as np
import pickle as pkl
import tensorflow as tf
from tensorflow.keras.models import load_model, clone_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

# Configuration
DATA_ROOT = Path("../Data/Experiment_Data/3_PreprocessDataset_Oversample")
BASE_MODEL_PATH = Path("../Models/tensorflow_model/MultiModal/MultiModal_ver1/Right/MM_Scratch.h5")
OTHER_DATA_ROOT = Path("../Data/Train_Data/3_MMExamples")
OTHER_SUBJECTS = ["101", "102"]
LABEL_BINARIZER_PATH = Path("../LabelBinarizer/Label_binarizer_6_classes.pkl")
NORM_PARAMS_PATH    = Path("../Normalization_params/Normalization_params_pickle/normalization_params_Right_ver1.pkl")
SAVE_TFLITE_DIR     = Path("../Models/Finetuned_Model/Finetune_model_6class_ver5/tflite")

TRAIN_SECONDS = [10, 30] + list(range(60, 481, 60))
NOISE_SEC, VAL_SEC = 3, 10
EPOCHS = 50
BATCH_SIZE = 32
SEED = 4
CLASSES = ["Shower", "Tooth_brushing", "Washing_hands", "Vacuum_Cleaner", "Wiping", "Other"]

# Utility Functions
def compute_num_frames(duration: float, window: float = 2.0, hop: float = 0.2) -> int:
    """Compute number of overlapping frames for a segment."""
    if duration < window:
        return 0
    return 1 + int(math.floor((duration - window) / hop))

# Load "Other" class data once
other_imu_list = []
other_audio_list = []
for subj in OTHER_SUBJECTS:
    subj_dir = OTHER_DATA_ROOT / subj / "Right" / "16000"
    for pkl_file in sorted(subj_dir.glob(f"{subj}---Other---*.pkl")):
        with open(pkl_file, 'rb') as f:
            data = pkl.load(f)
        other_imu_list.append(data['IMU'])
        other_audio_list.append(data['audio'])
other_imu_all = np.concatenate(other_imu_list, axis=0)
other_audio_all = np.concatenate(other_audio_list, axis=0)
print(f"Loaded Other IMU {other_imu_all.shape}, Audio {other_audio_all.shape}")

# Main fine-tuning loop
# Set seeds
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

# Load base model, label binarizer, normalization params
base_model = load_model(BASE_MODEL_PATH)
with open(LABEL_BINARIZER_PATH, 'rb') as f:
    lb = pkl.load(f)
with open(NORM_PARAMS_PATH, 'rb') as f:
    norm = pkl.load(f)
pm, pn = norm['max'], norm['min']
mean, std = norm['mean'], norm['std']

# Precompute frame counts
noise_frames = compute_num_frames(NOISE_SEC)
val_frames = compute_num_frames(VAL_SEC)

for train_sec in TRAIN_SECONDS:
    train_frames = compute_num_frames(train_sec)

    for pid_dir in sorted(DATA_ROOT.iterdir()):
        if not pid_dir.is_dir():
            continue
        pid = pid_dir.name
        print(f"\n>>> Participant: {pid}, window: {train_sec}s")

        # Load participant data
        with open(pid_dir / f"{pid}_preprocessing.pkl", 'rb') as f:
            pdata = pkl.load(f)
        imu_data = pdata['IMU']  # shape (N, T, F)
        audio_data = pdata['Audio']  # shape (N, n_mel, m)
        labels = pdata['Activity']

        # Split indices for classes except 'Other'
        train_idx_pc = []
        val_idx_pc = []
        for cls in CLASSES[:-1]:  # all except Other
            idxs = np.where(labels == cls)[0]
            diffs = np.diff(idxs)
            breaks = np.where(diffs != 1)[0]
            starts = np.concatenate(([0], breaks+1))
            ends = np.concatenate((breaks, [len(idxs)-1]))
            selected = []
            for s, e in zip(starts, ends):
                segment = idxs[s:e+1]
                if segment.size > noise_frames:
                    selected.append(segment[noise_frames:])
            if selected:
                allseg = np.concatenate(selected)
            else:
                allseg = np.array([], dtype=int)
            limited = allseg[:train_frames + val_frames]
            train_idx_pc.append(limited[:train_frames])
            val_idx_pc.append(limited[train_frames:])
        train_idx_pc = np.sort(np.concatenate(train_idx_pc)) if train_idx_pc else np.array([], dtype=int)
        val_idx_pc = np.sort(np.concatenate(val_idx_pc)) if val_idx_pc else np.array([], dtype=int)

        # Randomly sample 'Other' from external pool
        total_needed = train_frames + val_frames
        if other_imu_all.shape[0] < total_needed:
            raise ValueError("Not enough Other samples")
        ot_choices = np.random.choice(other_imu_all.shape[0], total_needed, replace=False)
        train_idx_ot = ot_choices[:train_frames]
        val_idx_ot = ot_choices[train_frames:]




        # Assemble training and validation sets
        X_imu_tr = np.concatenate([imu_data[train_idx_pc], other_imu_all[train_idx_ot]], axis=0)
        X_audio_tr = np.concatenate([audio_data[train_idx_pc], other_audio_all[train_idx_ot]], axis=0)
        y_tr = np.concatenate([labels[train_idx_pc], np.array(['Other']*train_frames)], axis=0)

        X_imu_val = np.concatenate([imu_data[val_idx_pc], other_imu_all[val_idx_ot]], axis=0)
        X_audio_val = np.concatenate([audio_data[val_idx_pc], other_audio_all[val_idx_ot]], axis=0)
        y_val = np.concatenate([labels[val_idx_pc], np.array(['Other']*val_frames)], axis=0)

        # Shuffle training set
        perm = np.random.permutation(len(y_tr))
        X_imu_tr, X_audio_tr, y_tr = X_imu_tr[perm], X_audio_tr[perm], y_tr[perm]

        # One-hot encoding labels
        y_tr_enc = lb.transform(y_tr)
        y_val_enc = lb.transform(y_val)

        # Normalize IMU data
        def norm_imu(x):
            scaled = 1 + (x - pm.reshape(1,1,-1)) * 2 / (pm.reshape(1,1,-1) - pn.reshape(1,1,-1))
            return ((scaled - mean.reshape(1,1,-1)) / std.reshape(1,1,-1)).astype('float32')

        X_imu_tr = norm_imu(X_imu_tr)
        X_imu_val = norm_imu(X_imu_val)
        X_audio_tr = X_audio_tr.astype('float32')
        X_audio_val = X_audio_val.astype('float32')

        # Fine-tune model
        model = clone_model(base_model)
        model.set_weights(base_model.get_weights())
        for layer in model.layers:
            layer.trainable = True
        model.compile(optimizer=Adam(1e-4), loss='categorical_crossentropy', metrics=['accuracy'])

        # Early-stopping
        es = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
        history = model.fit(
            [X_imu_tr, X_audio_tr], y_tr_enc,
            validation_data=([X_imu_val, X_audio_val], y_val_enc),
            epochs=EPOCHS, batch_size=BATCH_SIZE,
            callbacks=[es], verbose=2
        )

        # Convert to TFLite
        converter = tf.lite.TFLiteConverter.from_keras_model(model)
        converter.optimizations = [tf.lite.Optimize.DEFAULT]
        tflite_model = converter.convert()

        save_dir = SAVE_TFLITE_DIR / pid
        save_dir.mkdir(parents=True, exist_ok=True)
        (save_dir / f"Finetune_other_{train_sec}s.tflite").write_bytes(tflite_model)

        print(f"Done {pid} | window={train_sec}s -- saved to {save_dir}")


Loaded Other IMU (160650, 100, 9), Audio (160650, 96, 64)

>>> Participant: 201, window: 10s


ValueError: all the input array dimensions for the concatenation axis must match exactly, but along dimension 2, the array at index 0 has size 1 and the array at index 1 has size 9

In [7]:
print(imu_data[train_idx_pc].shape)
print(other_imu_all[train_idx_ot].shape)

(205, 100, 1)
(41, 100, 9)


(137593, 100, 1)