Dataset comes originally from https://archive.ics.uci.edu/dataset/780/har70, however a few errors where fixed by authors and up to date dataset can be found here: https://github.com/ntnu-ai-lab/harth-ml-experiments/tree/main/harth

In [None]:
from pathlib import Path
import pandas as pd
import numpy as np

ANNOTATIONS = {
    1: 'walking',
    3: 'shuffling',
    4: 'stairs (ascending)',
    5: 'stairs (descending)',
    6: 'standing',
    7: 'sitting',
    8: 'lying',
}


def prepare_raw(path: Path) -> dict[str, pd.DataFrame]:
    df = pd.read_csv(path, engine='pyarrow', index_col='timestamp')
    df.index.name = 'datetime'

    ground_truth = df['label']
    ground_truth.name = 'ground_truth'
    ground_truth = ground_truth.groupby(pd.Grouper(freq='1s')).median().dropna().astype(int)
    ground_truth = ground_truth.map(ANNOTATIONS).astype('category').dropna().to_frame()

    thigh = df[['thigh_x', 'thigh_y', 'thigh_z']].astype(np.float32)
    thigh.columns = ['acc_x', 'acc_y', 'acc_z']
    thigh['acc_x'] = -thigh['acc_x']  # Invert x-axis to match the expected orientation
    thigh['acc_y'] = -thigh['acc_y']  # Invert y-axis to match the expected orientation

    back = df[['back_x', 'back_y', 'back_z']].astype(np.float32)
    back.columns = ['acc_x', 'acc_y', 'acc_z']
    back['acc_x'] = -back['acc_x']  # Invert x-axis to match the expected orientation
    back['acc_y'] = -back['acc_y']  # Invert y-axis to match the expected orientation

    return {'ground_truth': ground_truth, 'thigh': thigh, 'trunk': back}


folder = Path('origin')
files = folder.rglob('*.csv')

output = Path('data')
output.mkdir(exist_ok=True, parents=True)

(output / 'ground_truth').mkdir(parents=True, exist_ok=True)
(output / 'thigh').mkdir(parents=True, exist_ok=True)
(output / 'trunk').mkdir(parents=True, exist_ok=True)

for file in files:
    id = file.stem
    data = prepare_raw(file)

    for name, df in data.items():
        df.to_parquet(output / name / f'{id}.parquet')