In [None]:
import os, argparse, json
import numpy as np
import pandas as pd
from glob import glob
from numpy.fft import rfft, rfftfreq


def windowize(sig, fs, win_sec=1.0, stride=0.5):
    n = len(sig)
    w = int(win_sec*fs)
    s = int(stride*fs)
    for start in range(0, n-w+1, s):
        yield start, sig[start:start+w]


def feats_from_window(x, fs):
    x = x - np.mean(x)
    rms = np.sqrt(np.mean(x**2))
    peak = np.max(np.abs(x))
    kurt = float(np.mean(((x)/ (np.std(x)+1e-9))**4))
    skew = float(np.mean(((x)/ (np.std(x)+1e-9))**3))
    # FFT features
    X = np.abs(rfft(x))
    f = rfftfreq(len(x), 1/fs)
    # bands around fundamental and sidebands
    def band_energy(f0, bw=50):
        m = (f>max(0,f0-bw)) & (f<(f0+bw))
        return float(np.sum(X[m]))
    f1 = band_energy(50)  # arbitrary low freq
    f2 = band_energy(500)
    f3 = band_energy(1000)
    spec_entropy = -np.sum((X/np.sum(X)+1e-12)*np.log((X/np.sum(X)+1e-12)))
    return [rms, peak, kurt, skew, f1, f2, f3, spec_entropy]


def process_file(path_npz):
    d = np.load(path_npz)
    fs = float(d['fs'])
    ia = d['ia']; ib = d['ib']; ic = d['ic']
    rows = []
    for start, win in windowize(ia, fs):
        feats = feats_from_window(win, fs)
        rows.append((start, 'ia', feats))
    for start, win in windowize(ib, fs):
        feats = feats_from_window(win, fs)
        rows.append((start, 'ib', feats))
    for start, win in windowize(ic, fs):
        feats = feats_from_window(win, fs)
        rows.append((start, 'ic', feats))
    return rows


def main():
    ap = argparse.ArgumentParser()
    ap.add_argument('--in', dest='ind', type=str, default='data/raw')
    ap.add_argument('--out', type=str, default='data/features')
    ap.add_argument('--win', type=float, default=1.0)
    ap.add_argument('--stride', type=float, default=0.5)
    args = ap.parse_args()

    os.makedirs(args.out, exist_ok=True)
    X, y = [], []
    for npz_path in sorted(glob(os.path.join(args.ind, '*.npz'))):
        json_path = npz_path.replace('.npz', '.json')
        with open(json_path) as f:
            meta = json.load(f)
        rows = process_file(npz_path)
        for start, phase, feats in rows:
            X.append(feats)
            y.append(meta['label'])
    X = np.array(X)
    df = pd.DataFrame(X, columns=['rms','peak','kurt','skew','E50','E500','E1000','spec_ent'])
    df['label'] = y
    df.to_csv(os.path.join(args.out, 'features.csv'), index=False)
    print('Saved', os.path.join(args.out, 'features.csv'), 'shape=', df.shape)

if _name_ == '_main_':
    main()