Origin data can be downloaded here: https://zenodo.org/records/12704412

In [None]:
from pathlib import Path
import pandas as pd


def prepare_raw(path: Path) -> pd.DataFrame:
    raw = pd.read_csv(path, engine='pyarrow')
    raw['datetime'] = raw['utc'].dt.tz_convert('Europe/Berlin')
    raw.columns = [x.strip() for x in raw.columns]
    raw = raw[['datetime', 'x', 'y', 'z']]
    raw.set_index('datetime', inplace=True)
    raw.rename(columns={'x': 'acc_x', 'y': 'acc_y', 'z': 'acc_z'}, inplace=True)

    return -raw


folder = Path('origin')

input = (folder / 'data').rglob('raw*.csv')
output = Path('data/thigh')
output.mkdir(exist_ok=True, parents=True)

for file in input:
    id = file.parent.stem
    df = prepare_raw(file)

    df.to_parquet(
        output / f'{id}.parquet',
    )

In [None]:
from pathlib import Path

import pandas as pd

folder = Path('origin')

ground_truth = pd.read_csv(folder / 'data/ground_truth_lab.csv', engine='pyarrow')
ground_truth.rename(columns={'time': 'datetime', 'activity': 'ground_truth'}, inplace=True)
ground_truth.set_index('datetime', inplace=True)
ground_truth['condition'] = 'laboratory'

input = Path(folder / 'data').rglob('annotations*seconds.csv')

output = Path('data/ground_truth')
output.mkdir(exist_ok=True, parents=True)


def prepare_free_living_ground_truth(path: Path) -> pd.DataFrame:
    df = pd.read_csv(path, engine='pyarrow')
    df['time'] = df['time'].dt.tz_convert(None).dt.tz_localize('Europe/Berlin').dt.tz_convert('UTC')
    df.rename(columns={'time': 'datetime', 'activity': 'ground_truth'}, inplace=True)
    df.set_index('datetime', inplace=True)
    df['condition'] = 'free-living'

    return df


for file in input:
    id = file.parent.stem
    df = prepare_free_living_ground_truth(file)
    gt = ground_truth.loc[ground_truth['id'] == id]
    df = pd.concat([df, gt])
    df.sort_index(inplace=True)

    df.to_parquet(
        output / f'{id}.parquet',
    )