Manual Segmentation Tool for Apple Watch IMU Gym Dataset
=========================================================

This tool allows manual segmentation of ACTIVE and IDLE regions
in IMU-based gym exercise CSV files.

Why manual?
-----------
The dataset contains long idle portions before/after real movement,
making automatic trimming unreliable. This tool helps you:

  • Visualize gravity signals (X/Y/Z)
  • Manually select active region by entering start/end frames
  • Automatically map exercise code to FINE_MAP labels
  • Save output as:
        <filename>_active.csv
        <filename>_idle.csv

Usage:
------
1. Set DATASET_DIR to the directory containing raw CSV files.
2. Run all cells.
3. Select a file → click "Visualize".
4. Inspect the plotted motion.
5. Enter "start frame" and "end frame".
6. Click "Save Segments".

Author: Muhammad Dwiva Arya Erlangga

# Imports

In [24]:
import os
import re
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import ipywidgets as widgets
from IPython.display import display, clear_output

# CONFIG — CHANGE THIS BEFORE RUNNING

In [29]:
DATASET_DIR = "/Users/muhammaddwivaaryaerlangga/Downloads/Whales1and2_Raw_Labelled"     # <-- CHANGE THIS
OUTPUT_DIR  = "./manual_segments"

os.makedirs(OUTPUT_DIR, exist_ok=True)

# FINE MAP — standardized exercise families

In [32]:
FINE_MAP = {
    "SBLP": "LatPull",
    "SAP":  "LatPull",
    "APULL": "Pullups",
    "PULL":  "Pullups",
    "CGCR":  "CableRow",
    "NGCR":  "CableRow",
    "MGTBR": "CableRow",
    "PREC":  "BicepCurl",
    "IDBC":  "BicepCurl",
    "AIDBC": "BicepCurl",
    "30DBP": "InclinePress",
    "45DBP": "InclinePress",
    "30BP":  "InclinePress",
    "DSP":   "ShoulderPress",
    "MSP":   "ShoulderPress",
    "SACLR": "LateralRaise",
    "DLR":   "LateralRaise",
    "SAOCTE": "TricepExt",
    "SAODTE": "TricepExt",
    "MTE":    "TricepExt",
    "CGOCTE": "TricepExt",
    "DWC":   "Wrist",
    "MIBP":  "InclinePress",
}

# Extract exercise code from filename
    Example filename:
    011224_IDBC_W7_5_S1_R13-2024-12-01_08-07-17.csv
    → returns "IDBC"

In [35]:
def extract_exercise_code(filename):
    match = re.search(r"_(\w+)_W", filename)
    if match:
        return match.group(1)
    return None

# Visualization Function

In [64]:
def visualize_csv(file_path, start_frame=None, end_frame=None):
    import matplotlib.pyplot as plt
    import pandas as pd
    import os

    df = pd.read_csv(file_path)

    gx = df["wristMotion_gravityX"].values
    gy = df["wristMotion_gravityY"].values
    gz = df["wristMotion_gravityZ"].values

    total_frames = len(df)

    # Default placeholders
    if start_frame is None:
        start_frame = 0
    if end_frame is None:
        end_frame = total_frames - 1

    plt.figure(figsize=(16, 6))

    plt.plot(gx, label="gravityX", alpha=0.6)
    plt.plot(gy, label="gravityY", alpha=0.6)
    plt.plot(gz, label="gravityZ", alpha=0.6)

    # Draw guideline boundaries
    if start_frame >= 0 and end_frame > start_frame:
        plt.axvline(start_frame, color='black', linestyle='--', alpha=0.5)
        plt.axvline(end_frame, color='black', linestyle='--', alpha=0.5)

    # Show entire plot first
    plt.xlim(0, total_frames - 1)

    plt.title(f"Gravity Visualization — {os.path.basename(file_path)}")
    plt.xlabel("Frame")
    plt.ylabel("Gravity Value")
    plt.legend()
    plt.grid(True)
    plt.show()

    print(f"Total frames: {total_frames}")
    print(f"Showing placeholder start={start_frame}, end={end_frame}")

# Save Segmented CSVs

In [41]:
def save_segments(file_path, start, end):
    df = pd.read_csv(file_path)

    base = os.path.splitext(os.path.basename(file_path))[0]

    # extract label
    code = extract_exercise_code(base)
    label = FINE_MAP.get(code, "Unknown")

    # ACTIVE movement rows
    df_active = df.iloc[start:end].copy()
    df_active["label"] = label

    # IDLE rows (everything else)
    idle_part_1 = df.iloc[:start]
    idle_part_2 = df.iloc[end:]
    df_idle = pd.concat([idle_part_1, idle_part_2], axis=0).copy()
    df_idle["label"] = "Idle"

    # Save
    active_path = os.path.join(OUTPUT_DIR, f"{base}_active.csv")
    idle_path   = os.path.join(OUTPUT_DIR, f"{base}_idle.csv")

    df_active.to_csv(active_path, index=False)
    df_idle.to_csv(idle_path, index=False)

    return active_path, idle_path

# UI Widgets

In [44]:
file_dropdown = widgets.Dropdown(
    options=[f for f in os.listdir(DATASET_DIR) if f.endswith(".csv")],
    description="File:",
    layout=widgets.Layout(width="500px")
)

start_box = widgets.IntText(
    description="Start Frame:",
    layout=widgets.Layout(width="300px")
)

end_box = widgets.IntText(
    description="End Frame:",
    layout=widgets.Layout(width="300px")
)

btn_visualize = widgets.Button(
    description="Visualize",
    button_style="info",
    layout=widgets.Layout(width="200px")
)

btn_save = widgets.Button(
    description="Save Segments",
    button_style="success",
    layout=widgets.Layout(width="200px")
)

output_area = widgets.Output()

# Widget Callbacks

In [72]:
def on_visualize_clicked(b):
    with output_area:
        clear_output()
        
        file_path = os.path.join(DATASET_DIR, file_dropdown.value)
        print(f"Displaying: {file_dropdown.value}")

        start = start_box.value
        end   = end_box.value

        # Allow None defaults for placeholders
        start_frame = start if start is not None else 0
        end_frame   = end if end is not None else None

        visualize_csv(
            file_path,
            start_frame=start_frame,
            end_frame=end_frame
        )


def on_save_clicked(b):
    with output_area:
        clear_output()

        file_path = os.path.join(DATASET_DIR, file_dropdown.value)
        start = start_box.value
        end   = end_box.value

        if start is None or end is None:
            print("❌ ERROR: Start and End must be set.")
            return
        
        if start >= end:
            print("❌ ERROR: Start must be < End.")
            return

        active_path, idle_path = save_segments(
            file_path,
            start,
            end
        )

        print("✔ Segmentation saved!")
        print("Active file:", active_path)
        print("Idle file:", idle_path)

def on_file_change(change):
    if change["name"] == "value":
        with output_area:
            clear_output()

            file_path = os.path.join(DATASET_DIR, change["new"])
            df = pd.read_csv(file_path)

            # Reset the start/end frames to full visible range
            start_box.value = 0
            end_box.value = len(df) - 1

            print(f"Switched to: {change['new']}")
            visualize_csv(
                file_path,
                start_frame=0,
                end_frame=len(df) - 1
            )

# Attach listener
file_dropdown.observe(on_file_change, names="value")
# Attach button callbacks
btn_visualize.on_click(on_visualize_clicked)
btn_save.on_click(on_save_clicked)

# Display UI

In [74]:
ui = widgets.VBox([
    file_dropdown,
    widgets.HBox([start_box, end_box]),
    widgets.HBox([btn_visualize, btn_save]),
    output_area
])

display(ui)

VBox(children=(Dropdown(description='File:', layout=Layout(width='500px'), options=('261224_NGCR_W32_S1_R8-202…