# Downsample ch1..ch16 from 125 Hz to 25 Hz

This notebook looks for `*_labeled.csv` files, downsamples `ch1..ch16` by 5x using an anti alias polyphase filter, aligns all other columns to the new length, sets `Sampling Rate` to 25, resets `Sample Index`, and writes `<name>_ds25.csv`.


In [1]:
# Parameters
INPUT_DIR = "/home/jupyter-yin10/EEG_HAR/PLHI-HAR_EEG-2025_new"
UP = 1
DOWN = 5  # 125 to 25
TARGET_FS = 25
EEG_COLS = [f"ch{i}" for i in range(1, 17)]  # ch1..ch16


In [3]:
from pathlib import Path
import numpy as np
import pandas as pd
from scipy.signal import resample_poly

inp = Path(INPUT_DIR)
files = sorted([p for p in inp.iterdir() if p.is_file() and p.suffix.lower() == ".csv" and p.name.endswith("_labeled.csv")])
print(f"Found {len(files)} labeled CSV files in {inp}")

def compute_alignment_indices(n_in: int, n_out: int):
    idx = np.round(np.linspace(0, n_in - 1, n_out)).astype(int)
    idx[idx < 0] = 0
    if n_in > 0:
        idx[idx > n_in - 1] = n_in - 1
    return idx

def resample_series(x, up=1, down=5):
    x = np.asarray(x, dtype=np.float64).ravel()
    return resample_poly(x, up=up, down=down)

results = []
for i, f in enumerate(files, start=1):
    try:
        df = pd.read_csv(f)

        # Check columns exist
        missing = [c for c in EEG_COLS if c not in df.columns]
        if missing:
            print(f"[{i:02d}] SKIP {f.name}: missing EEG columns {missing}")
            results.append((f.name, 'skipped_missing_cols'))
            continue

        n_in = len(df)
        if n_in == 0:
            print(f"[{i:02d}] SKIP {f.name}: empty file")
            results.append((f.name, 'skipped_empty'))
            continue

        # Resample target EEG channels
        resampled = {}
        for ch in EEG_COLS:
            resampled[ch] = resample_series(df[ch].values, up=UP, down=DOWN)

        # Confirm common length
        lengths = {k: len(v) for k, v in resampled.items()}
        unique_lengths = set(lengths.values())
        if len(unique_lengths) != 1:
            print(f"[{i:02d}] SKIP {f.name}: inconsistent resampled lengths {lengths}")
            results.append((f.name, 'skipped_inconsistent_lengths'))
            continue
        n_out = unique_lengths.pop()

        # Align non EEG columns by indexing
        idx = compute_alignment_indices(n_in, n_out)
        out_df = df.iloc[idx].copy()

        # Overwrite EEG columns with resampled data
        for ch, y in resampled.items():
            out_df[ch] = y.astype(float)

        # Reset Sample Index if present
        if 'Sample Index' in out_df.columns:
            out_df['Sample Index'] = np.arange(n_out)

        out_path = f.with_name(f.stem.replace('_labeled', '') + '_ds25.csv')
        out_df.to_csv(out_path, index=False)
        print(f"[{i:02d}] OK {f.name}: rows_in={n_in} rows_out={n_out} -> {out_path.name}")
        results.append((f.name, 'ok'))
    except Exception as e:
        print(f"[{i:02d}] ERROR {f.name}: {e}")
        results.append((f.name, f'error: {e}'))

pd.DataFrame(results, columns=['file','status'])


Found 54 labeled CSV files in /home/jupyter-yin10/EEG_HAR/PLHI-HAR_EEG-2025_new
[01] OK OpenBCISession_s1-chair squats_stacked_labeled.csv: rows_in=57013 rows_out=11403 -> OpenBCISession_s1-chair squats_stacked_ds25.csv
[02] OK OpenBCISession_s1-light stationary cycling_stacked_labeled.csv: rows_in=57177 rows_out=11436 -> OpenBCISession_s1-light stationary cycling_stacked_ds25.csv
[03] OK OpenBCISession_s1-marching in place_stacked_labeled.csv: rows_in=51235 rows_out=10247 -> OpenBCISession_s1-marching in place_stacked_ds25.csv
[04] OK OpenBCISession_s1-seated boxing hooks_stacked_labeled.csv: rows_in=58126 rows_out=11626 -> OpenBCISession_s1-seated boxing hooks_stacked_ds25.csv
[05] OK OpenBCISession_s1-seated leg extensions_stacked_labeled.csv: rows_in=77573 rows_out=15515 -> OpenBCISession_s1-seated leg extensions_stacked_ds25.csv
[06] OK OpenBCISession_s1-seated medicine ball twists_stacked_labeled.csv: rows_in=104155 rows_out=20831 -> OpenBCISession_s1-seated medicine ball twists_

[54] OK OpenBCISession_s6-wallpushups_stacked_labeled.csv: rows_in=59320 rows_out=11864 -> OpenBCISession_s6-wallpushups_stacked_ds25.csv


Unnamed: 0,file,status
0,OpenBCISession_s1-chair squats_stacked_labeled...,ok
1,OpenBCISession_s1-light stationary cycling_sta...,ok
2,OpenBCISession_s1-marching in place_stacked_la...,ok
3,OpenBCISession_s1-seated boxing hooks_stacked_...,ok
4,OpenBCISession_s1-seated leg extensions_stacke...,ok
5,OpenBCISession_s1-seated medicine ball twists_...,ok
6,OpenBCISession_s1-seated side bends_stacked_la...,ok
7,OpenBCISession_s1-side-stepping_stacked_labele...,ok
8,OpenBCISession_s1-standing heel to toe walk_st...,ok
9,OpenBCISession_s1-wall push-ups_stacked_labele...,ok
