Import libs

In [1]:
import os
import glob
import numpy as np
import pandas as pd

Constants

In [None]:
# Root dirs
SEGMENTS_BASEDIR = "../processed/swings_segments"
METADATA_PATH = "../processed/swings_metadata.csv"
FEATURES_OUT_PATH = "../processed/swing_features.csv"

print("Looking under:", SEGMENTS_BASEDIR)
print("Expecting subdirs: backhand, forehand, serve")
print("Features to:", FEATURES_OUT_PATH)

Looking under: ../processed/swings_segments
Expecting subdirs: backhand, forehand, serve
Will write features to: ../processed/swing_features.csv


Helped Functions

In [None]:
def extract_features_from_segment_csv(file_path: str, label_from_dir: str):
    """
    file_path: path to a single swing file like
               processed/swings_segments/backhand/backhand_backhands_watch-right_idx01.csv
    label_from_dir: "backhand" | "forehand" | "serve"

    Returns: dict of aggregated features for this swing
    """

    df = pd.read_csv(file_path)

    required_cols = ["rotationRateX", "rotationRateY", "rotationRateZ"]
    for col in required_cols:
        if col not in df.columns:
            raise ValueError(f"{file_path} is missing {col}")

    # gyro time-series
    x = df["rotationRateX"].to_numpy()
    y = df["rotationRateY"].to_numpy()
    z = df["rotationRateZ"].to_numpy()

    # peak absolute angular velocity per axis
    peak_abs_X = float(np.max(np.abs(x)))
    peak_abs_Y = float(np.max(np.abs(y)))
    peak_abs_Z = float(np.max(np.abs(z)))

    # find where |Z| is maximized
    idx_peak_Z = int(np.argmax(np.abs(z)))
    sign_Z_at_peak = float(np.sign(z[idx_peak_Z]))  # -1.0, 0.0, or 1.0

    # duration of swing feature (end time - start time).
    # t_rel_ms we can compute it.
    if "t_rel_ms" in df.columns:
        duration_ms = float(df["t_rel_ms"].iloc[-1] - df["t_rel_ms"].iloc[0])
    else:
        # fallback: fallback to len * 10ms since sampling rate 10Hz
        duration_ms = float((len(df) - 1) * 10.0)

    # note session_id
    session_id = df["session_id"].iloc[0] if "session_id" in df.columns else None

    # check label
    if "label" in df.columns:
        label_from_file = str(df["label"].iloc[0])
    else:
        label_from_file = None
    final_label = label_from_dir

    return {
        "swing_file": file_path,
        "session_id": session_id,
        "stroke_label": final_label,  # backhand | forehand | serve

        "peak_abs_X": peak_abs_X,
        "peak_abs_Y": peak_abs_Y,
        "peak_abs_Z": peak_abs_Z,

        "sign_Z_at_peak": sign_Z_at_peak,   # direction of dominant Z spin for that swing
        "duration_ms": duration_ms,

        # ratios can help a dumb tree split easily
        "Z_over_X": peak_abs_Z / peak_abs_X if peak_abs_X != 0 else np.inf,
        "Z_over_Y": peak_abs_Z / peak_abs_Y if peak_abs_Y != 0 else np.inf,
        "X_over_Y": peak_abs_X / peak_abs_Y if peak_abs_Y != 0 else np.inf,
    }


Extract peak features

In [4]:
all_feature_rows = []

for stroke_dir in ["backhand", "forehand", "serve"]:
    full_dir = os.path.join(SEGMENTS_BASEDIR, stroke_dir)
    if not os.path.isdir(full_dir):
        print(f"Skipping {stroke_dir} (no dir {full_dir})")
        continue

    swing_csvs = glob.glob(os.path.join(full_dir, "*.csv"))
    swing_csvs = sorted(swing_csvs)

    print(f"{stroke_dir}: found {len(swing_csvs)} swings")

    for swing_file in swing_csvs:
        feat_row = extract_features_from_segment_csv(
            file_path=swing_file,
            label_from_dir=stroke_dir
        )
        all_feature_rows.append(feat_row)

features_df = pd.DataFrame(all_feature_rows)
print("Total swings processed:", len(features_df))
features_df.head()


backhand: found 6 swings
forehand: found 7 swings
serve: found 7 swings
Total swings processed: 20


Unnamed: 0,swing_file,session_id,stroke_label,peak_abs_X,peak_abs_Y,peak_abs_Z,sign_Z_at_peak,duration_ms,Z_over_X,Z_over_Y,X_over_Y
0,../processed/swings_segments/backhand/backhand...,backhands_watch-right,backhand,7.848593,9.741038,9.918324,-1.0,3900.0,1.263707,1.0182,0.805725
1,../processed/swings_segments/backhand/backhand...,backhands_watch-right,backhand,10.122697,11.718678,13.072875,-1.0,3550.0,1.291442,1.115559,0.863809
2,../processed/swings_segments/backhand/backhand...,backhands_watch-right,backhand,8.459282,11.353635,12.163086,-1.0,3510.0,1.437839,1.071294,0.745073
3,../processed/swings_segments/backhand/backhand...,backhands_watch-right,backhand,8.078526,11.166613,12.054246,-1.0,3760.0,1.492134,1.07949,0.723454
4,../processed/swings_segments/backhand/backhand...,backhands_watch-right,backhand,9.223503,10.771327,11.978202,-1.0,3530.0,1.298661,1.112045,0.856301


In [5]:
summary = features_df.groupby("stroke_label")[[
    "peak_abs_X", "peak_abs_Y", "peak_abs_Z",
    "sign_Z_at_peak",
    "duration_ms",
    "Z_over_X", "Z_over_Y", "X_over_Y"
]].agg(["mean", "std", "min", "max"])

summary


Unnamed: 0_level_0,peak_abs_X,peak_abs_X,peak_abs_X,peak_abs_X,peak_abs_Y,peak_abs_Y,peak_abs_Y,peak_abs_Y,peak_abs_Z,peak_abs_Z,...,Z_over_X,Z_over_X,Z_over_Y,Z_over_Y,Z_over_Y,Z_over_Y,X_over_Y,X_over_Y,X_over_Y,X_over_Y
Unnamed: 0_level_1,mean,std,min,max,mean,std,min,max,mean,std,...,min,max,mean,std,min,max,mean,std,min,max
stroke_label,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
backhand,8.747556,0.831433,7.848593,10.122697,10.931164,0.678993,9.741038,11.718678,11.830886,1.037634,...,1.263707,1.492134,1.080908,0.035384,1.0182,1.115559,0.800355,0.056939,0.723454,0.863809
forehand,16.086659,1.155682,14.702891,17.445503,15.005458,1.066914,13.168766,16.16221,11.24184,1.295597,...,0.578568,0.743992,0.75592,0.12933,0.562448,0.948828,1.079216,0.136468,0.916971,1.299477
serve,18.987623,3.288838,15.772879,24.172422,13.586952,1.96389,10.972126,17.052198,18.799091,1.777902,...,0.858656,1.114385,1.393787,0.105107,1.217191,1.504622,1.397436,0.12907,1.231855,1.561475


In [None]:
features_df.to_csv(FEATURES_OUT_PATH, index=False)
print("feature table at", FEATURES_OUT_PATH)


Wrote feature table to: ../processed/swing_features.csv
