In [3]:
import os
import sys
from pathlib import Path
import pickle as pkl
import numpy as np
from numpy.lib.stride_tricks import as_strided
from tqdm import tqdm
from utils import vggish_input, params
import pandas as pd

# Add project directory for HCAR utils
def add_time_zero_row(df, time_col='Time', unixtime_col='UnixTime'):
    """
    Ensure a row at time zero in the DataFrame.
    """
    df = df.copy()
    df[time_col]     = pd.to_numeric(df[time_col],     errors='coerce')
    df[unixtime_col] = pd.to_numeric(df[unixtime_col], errors='coerce')

    if (df[time_col] == 0).any():
        return df

    first = df.iloc[0].copy()
    new = first.copy()
    new[time_col]     = 0
    new[unixtime_col] = first[unixtime_col] - int(first[time_col] * 1000)
    return pd.concat([pd.DataFrame([new]), df], ignore_index=True)

def frame(data, window_length, hop_length):
    """
    Frame 2D array into overlapping windows.
    """
    if data.shape[0] < window_length:
        pad_n = window_length - data.shape[0]
        data = np.vstack([data, np.zeros((pad_n, data.shape[1]))])
    n_frames = 1 + (data.shape[0] - window_length) // hop_length
    shape    = (n_frames, window_length, data.shape[1])
    strides  = (hop_length * data.strides[0],) + data.strides
    return as_strided(data, shape=shape, strides=strides)

def rebuild_waveform(df):
    """
    Flatten multi-column audio data to a waveform and return start time.
    """
    cols = [c for c in df.columns if c.startswith('AudioData')]
    arr  = df[cols].to_numpy(dtype=np.int16)
    times= df['UnixTime_s'].to_numpy()
    return arr.flatten(), times[0]

def generate_mel_chunks(waveform, sr=16000, lower_edge_hertz=10,
                         upper_edge_hertz=8000, chunk_secs=10):
    """
    Generate mel spectrogram chunks from a long waveform.
    """
    chunk_samples = int(chunk_secs * sr)
    mel_chunks = []
    for start in tqdm(range(0, len(waveform), chunk_samples)):
        chunk = waveform[start:start + chunk_samples]
        if chunk.size == 0:
            break
        mel = vggish_input.wavform_to_concat_examples(
            chunk,
            lower_edge_hertz=lower_edge_hertz,
            upper_edge_hertz=upper_edge_hertz,
            sr=sr
        )
        mel_chunks.append(mel)
    return np.concatenate(mel_chunks, axis=0)

if __name__ == '__main__':
    DATA_PATH = Path("../../Data/Experiment_Data/1_RawDataset")
    SAVE_PATH = Path("../../Data/Experiment_Data/2_PreprocessDataset")

    participants = [d.name for d in DATA_PATH.iterdir() if d.is_dir()]
    class_list = [
        'Tooth_brushing', 'Washing_hands', 'Shower',
        'Wiping', 'Vacuum_Cleaner', 'Other'
    ]

    for participant in participants:
        if participant == '204':
            continue
        folder = DATA_PATH / participant

        # Load CSVs
        anno_df = pd.read_csv(
            next(folder.glob('*annotation.csv')), engine='python', on_bad_lines='skip'
        )
        pred_df = pd.read_csv(
            next(folder.glob('*Predicted_Activity.csv')), engine='python', on_bad_lines='skip'
        )

        # Harmonize labels
        anno_df['Activity'] = anno_df['Activity'].str.replace(
            'Toothbrushing', 'Tooth_brushing', regex=False)
        pred_df['Predict']   = pred_df['Predict'].str.replace(' ', '_', regex=False)

        # Insert time-zero
        anno_df = add_time_zero_row(anno_df)
        pred_df = add_time_zero_row(pred_df)

        # Align annotation times
        delta = pred_df.loc[pred_df.Time==0, 'UnixTime'].iloc[0] - \
                anno_df.loc[anno_df.Time==0, 'UnixTime'].iloc[0]
        anno_df['UnixTime'] += delta

        # Truncate and add Session Stop
        end_unix = pred_df['UnixTime'].max()
        anno_df = anno_df[anno_df['UnixTime'] <= end_unix]
        last = pred_df.loc[pred_df.UnixTime.idxmax()].to_dict()
        last.update({'Event':'Session Stop','Activity':'','Confirm':''})
        anno_df = pd.concat([anno_df, pd.DataFrame([last])], ignore_index=True)

        # Fix unmatched starts, drop bad ends
        anno_df['Confirm'] = anno_df['Confirm'].fillna('')
        df = anno_df.copy()
        next_act = df['Activity'].shift(-1)
        next_evt = df['Event'].shift(-1)
        df = df[~((df.Event=='Start') & ~((next_act==df.Activity)&(next_evt=='End')))]
        drop = []
        for idx,row in df[(df.Event=='End')&(df.Confirm=='no')].iterrows():
            starts = df[(df.Activity==row.Activity)&(df.Event=='Start')&(df.index<idx)]
            drop += ([starts.index.max(), idx] if not starts.empty else [idx])
        df = df.drop(drop).sort_values(['UnixTime','Time']).reset_index(drop=True)
        anno_df = df

        # Convert to seconds and save CSV
        anno_df['UnixTime_s'] = anno_df['UnixTime'] / 1000.0
        out = SAVE_PATH / participant
        out.mkdir(parents=True, exist_ok=True)
        anno_df.to_csv(out / f"{participant}_Annotation_processed.csv", index=False)
        pred_df.to_csv(out / f"{participant}_Predicted_Activity_processed.csv", index=False)

    # Process sensor/audio
    for participant in participants:
        if participant == '204':
            continue
        folder = DATA_PATH / participant
        sensor_df = pd.read_csv(
            next(folder.glob('*SensorData.csv')), engine='python', on_bad_lines='skip'
        )
        audio_df = pd.read_csv(
            next(folder.glob('*AudioData.csv')), engine='python', on_bad_lines='skip'
        )
        anno_clean = pd.read_csv(
            SAVE_PATH/participant/f"{participant}_Annotation_processed.csv"
        )

        # Remove noisy rows
        exp = sensor_df.columns[:17]
        extra = sensor_df.columns[17:]
        bad = sensor_df[exp].isnull().any(1) | sensor_df[extra].notnull().any(1)
        sensor_df = sensor_df[~bad].reset_index(drop=True)

        # Add time-zero rows
        sensor_df = add_time_zero_row(sensor_df)
        audio_df  = add_time_zero_row(audio_df)

        # Sync times
        base_zero = sensor_df.loc[sensor_df.Time==0,'UnixTime'].iloc[0]
        audio_df['UnixTime'] += base_zero - audio_df.loc[audio_df.Time==0,'UnixTime'].iloc[0]
        anno_clean['UnixTime'] += base_zero - anno_clean.loc[anno_clean.Time==0,'UnixTime'].iloc[0]

        # Truncate session
        stop_ts = anno_clean.loc[anno_clean.Event=='Session Stop','UnixTime'].iloc[0]
        sensor_df = sensor_df[sensor_df.UnixTime<=stop_ts].reset_index(drop=True)
        audio_df  = audio_df[audio_df.UnixTime<=stop_ts].reset_index(drop=True)

        # Convert to seconds
        for df in (sensor_df, audio_df, anno_clean):
            df['UnixTime_s'] = df['UnixTime']/1000.0

        # Frame IMU (drop timestamp)
        imu_arr = sensor_df[
            ['AccX','AccY','AccZ','GyroX','GyroY','GyroZ','RotVecX','RotVecY','RotVecZ']
        ].to_numpy()
        iwlen  = int(2.0 * 50)
        iwstep = int(0.2 * 50)
        imu_frames = frame(imu_arr, iwlen, iwstep)
        print("IMU frames shape:", imu_frames.shape)

        # Build intervals
        intervals = []
        stack = {}
        for _,row in anno_clean.query("Event!='Session Start' and Event!='Session Stop'").iterrows():
            if row.Event=='Start':
                stack[row.Activity] = row.UnixTime_s
            else:
                if row.Activity in stack:
                    intervals.append((stack.pop(row.Activity), row.UnixTime_s, row.Activity))
        print("Intervals:", intervals)

        # Audio waveform and mel
        waveform, audio_start = rebuild_waveform(audio_df)
        print("Audio start:", audio_start)
        audio_examples = generate_mel_chunks(
            waveform, sr=16000, lower_edge_hertz=10,
            upper_edge_hertz=8000, chunk_secs=3600
        )
        print("Mel shape:", audio_examples.shape)

        # Timestamp array for mel frames
        hop = params.STFT_HOP_LENGTH_SECONDS
        win = params.STFT_WINDOW_LENGTH_SECONDS
        n_mels = audio_examples.shape[0]
        audio_ts= audio_start + np.arange(n_mels)*hop + win

        # Mel frames per example
        ex_len = int(params.EXAMPLE_WINDOW_SECONDS / hop)

        w_a, w_i, w_l = [], [], []
        for frames in imu_frames:
            # use separate time if available; using first col assumed time
            s_t, e_t = frames[0,0], frames[-1,0]
            s_idx = np.searchsorted(audio_ts, s_t)
            e_idx = s_idx + ex_len
            if e_idx > n_mels: continue
            seg = audio_examples[s_idx:e_idx]
            if seg.shape[0] < ex_len:
                seg = np.vstack([seg, np.zeros((ex_len-seg.shape[0], seg.shape[1]))])
            ov = {act:0.0 for act in class_list}
            for st,et,act in intervals:
                ov[act] += max(0, min(e_t,et)-max(s_t,st))
            covered = sum(ov.values())
            ov['Other'] += max(0, (e_t-s_t)-covered)
            lbl = max(ov, key=ov.get)
            w_a.append(seg)
            w_i.append(frames)
            w_l.append(lbl)

        X_audio = np.stack(w_a)
        X_imu   = np.stack([f[:, :9] for f in w_i])
        Y_lab   = np.array(w_l)
        print("Final shapes → audio:", X_audio.shape,
              "imu:", X_imu.shape, "labels:", Y_lab.shape)

        # Save to pickle
        out = SAVE_PATH / participant
        out.mkdir(parents=True, exist_ok=True)
        with open(out / f"{participant}_preprocessing.pkl", 'wb') as f:
            pkl.dump({'IMU':X_imu, 'Audio':X_audio, 'Activity':Y_lab}, f, protocol=4)

IMU frames shape: (137563, 100, 9)
Intervals: [(1746692221.826, 1746692293.555, 'Washing_hands'), (1746692321.585, 1746692541.304, 'Tooth_brushing'), (1746692985.85, 1746693055.83, 'Wiping'), (1746693078.087, 1746693143.191, 'Wiping'), (1746693203.624, 1746693327.819, 'Vacuum_Cleaner'), (1746693368.225, 1746693508.475, 'Vacuum_Cleaner'), (1746693557.003, 1746693635.213, 'Wiping'), (1746693792.988, 1746693863.497, 'Wiping'), (1746693880.754, 1746693979.834, 'Wiping'), (1746694039.7, 1746694048.231, 'Vacuum_Cleaner'), (1746696428.025, 1746696723.775, 'Vacuum_Cleaner'), (1746696882.776, 1746696921.721, 'Vacuum_Cleaner'), (1746697210.253, 1746697271.303, 'Wiping'), (1746697284.612, 1746697342.278, 'Wiping'), (1746697369.542, 1746697436.419, 'Washing_hands'), (1746699176.146, 1746699244.343, 'Wiping'), (1746699337.727, 1746699403.651, 'Wiping'), (1746699554.077, 1746699765.299, 'Tooth_brushing'), (1746699806.641, 1746699870.379, 'Washing_hands'), (1746703461.296, 1746703660.634, 'Tooth_brus

100%|██████████| 9/9 [03:34<00:00, 23.79s/it]


Mel shape: (994498, 64)
Final shapes → audio: (137563, 96, 64) imu: (137563, 100, 9) labels: (137563,)
