# Tutorial: Data Analysis with Accelerometer Data

Austin MBaye

Northeastern University

## **Overview**
In this tutorial, we will walk through the our SW1PerS algorithm on the accelerometer data provided with the dataset.

---

## **SW1PerS + Accelerometer Algorithm Walkthrough**
The  pipeline follows these key steps:

1.  **Data Extraction** - How to extract the accelerometer data from the directory
2.  **Preprocessing** – Cleaning and structuring accelerometer data for analysis.  
3.  **Period Estimation** – Identifying periodic patterns in movement using the LAPIS framework.
4.  **Computing Sliding Window Embedding** – Transforming accelerometer data into time-delayed embeddings.  
5.  **Computing Persistent Homology & Extracting Features** – Calculating topological persistence and computing periodicity score.



## Data Extraction

All of the necessary functions needed are located in `AQSM_SW1PerS.utils.accelerometer`

In [None]:
import numpy as np
import pandas as pd
from AQSM_SW1PerS.utils.accelerometer import *
from AQSM_SW1PerS.utils.paths import get_data_path


In [None]:

data_folder = 'Study2/001-2010-05-25'  #You only need to change the study and participant identifier
folder_path = get_data_path("data", data_folder)

if '001-2010-05-28' in str(folder_path):  #The timestamps in the .xml file for this participant are incorrect so we must use the .xlsx file for correct timestamps
    annofile = 'Annotator1Stereotypy.annotation.xlsx'
else:
    annofile = "Annotator1Stereotypy.annotation.xml"

#Nothing here needs to be changed. The first study uses MITes accelerometers while the second study used Wockets
if 'Study1' in str(folder_path):
    torso_file = "MITes_01_RawCorrectedData_Trunk.RAW_DATA.csv"
    left_wrist_file = "MITes_08_RawCorrectedData_Left-wrist.RAW_DATA.csv"
    right_wrist_file = "MITes_11_RawCorrectedData_Right-wrist.RAW_DATA.csv"
else:
    torso_file = "Wocket_02_RawCorrectedData_Torso.csv"
    left_wrist_file = "Wocket_01_RawCorrectedData_Left-Wrist.csv"
    right_wrist_file = "Wocket_00_RawCorrectedData_Right-Wrist.csv"
    

### Extract the key metadata

`process_accelerometer_data` extracts all 4-second time intervals of SMM and subsmaples all 4-second intervals of no stereotypy to be $min(3\#(SMM),\#(no SMM))$

`get_accel_data` extacts the accelerometer data formatted as (timestamp, x, y, z)

In [None]:
meta_data = process_accelerometer_data(folder_path, annofile, torso_file)

torso_X = get_accel_data(folder_path, torso_file)
left_wrist_X = get_accel_data(folder_path, left_wrist_file)
right_wrist_X = get_accel_data(folder_path, right_wrist_file)


### Helper functions

These helper functions are useful to resmaple all accelerometers to 95Hz 

In [None]:

def get_label_occurrence_index(window_meta, target_label, occurrence=0):
    """Get the index of the nth occurrence of a label."""
    matches = [i for i, meta in enumerate(window_meta) if meta['label'] == target_label]
    if occurrence >= len(matches):
        raise ValueError(f"Only {len(matches)} occurrences of label '{target_label}' found.")
    return matches[occurrence]

def extract_processed_window(X, meta_entry, expected_length=380, sigma=3):
    """Extract smoothed and interpolated window from raw accelerometer data."""
    window = getAccelerometerRange(X, meta_entry, interpolate_to_fixed_length=True, expected_length=expected_length)
    timestamps = window[:, 0]
    accel = smoothDataGaussian(window[:, 1:], sigma=sigma)
    return np.column_stack((timestamps, accel))

def interpolate_xyz(window_data):
    """Return cubic spline functions for x, y, z."""
    timestamps_sec = (window_data[:, 0] - window_data[0, 0]) / 1000.0
    ax, ay, az = window_data[:, 1], window_data[:, 2], window_data[:, 3]
    return (
        CubicSpline(timestamps_sec, ax),
        CubicSpline(timestamps_sec, ay),
        CubicSpline(timestamps_sec, az),
        timestamps_sec
    )

def estimate_sampling_rate(timestamps_ms):
    """Estimate Hz from time deltas (in milliseconds)."""
    deltas = np.diff(timestamps_ms)
    return 1000.0 / np.mean(deltas)
    

# SW1PerS Pipeline

The follwoing folows nearly an identical process as in the other tutorial. The key change is the use of the LAPIS framework that is discussed in the Supplementary Note. 4 in the paper.

In [None]:

from AQSM_SW1PerS.SW1PerS import *


In [None]:

label = 'Flap-Rock'
occurrence = 20
i = get_label_occurrence_index(meta_data, label, occurrence)

meta_i = meta_data[i]

# Process each sensor
window_torso = extract_processed_window(torso_X, meta_i)
window_lwrist = extract_processed_window(left_wrist_X, meta_i)
window_rwrist = extract_processed_window(right_wrist_X, meta_i)

# Interpolation (e.g., for torso)
t_x, t_y, t_z, timestamps_sec = interpolate_xyz(window_torso)
l_x, l_y, l_z, timestamps_sec = interpolate_xyz(window_lwrist)
r_x, r_y, r_z, timestamps_sec = interpolate_xyz(window_rwrist)

# Sampling rate
sampling_rate_torso = estimate_sampling_rate(window_torso[:, 0])
sampling_rate_lwrist = estimate_sampling_rate(window_lwrist[:, 0])
sampling_rate_rwrist = estimate_sampling_rate(window_rwrist[:, 0])

print(f"Sampling Rate Torso: {sampling_rate_torso:.2f} Hz")
print(f"Sampling Rate Left Wrist: {sampling_rate_lwrist:.2f} Hz")
print(f"Sampling Rate Right Wrist: {sampling_rate_rwrist:.2f} Hz")


In [None]:

def resample_and_detrend_splines(t_x, t_y, t_z, sampling_rate, duration_sec=4):
    """Resample splines uniformly, detrend, re-spline, and return interpolated matrix and splines."""
    num_points = int(duration_sec * sampling_rate)
    t_vals = np.linspace(0, duration_sec, num_points)

    # Evaluate and detrend
    keypoint_x = signal.detrend(t_x(t_vals))
    keypoint_y = signal.detrend(t_y(t_vals))
    keypoint_z = signal.detrend(t_z(t_vals))

    # Re-spline (optional but recommended for future use)
    t_x_new = CubicSpline(t_vals, keypoint_x)
    t_y_new = CubicSpline(t_vals, keypoint_y)
    t_z_new = CubicSpline(t_vals, keypoint_z)

    # Stack interpolated coordinates
    X_interp = np.column_stack((keypoint_x, keypoint_y, keypoint_z))
    cs_list = [t_x_new, t_y_new, t_z_new]

    return t_vals, X_interp, cs_list, keypoint_x, keypoint_y, keypoint_z


In [None]:

t_vals_torso, torso_interp, cs_torso, torso_x, torso_y, torso_z = resample_and_detrend_splines(t_x, t_y, t_z, sampling_rate_torso, duration_sec=4)
t_vals_lwrist, lwrist_interp, cs_lwrist, lwrist_x, lwrist_y, lwrist_z = resample_and_detrend_splines(l_x, l_y, l_z, sampling_rate_lwrist, duration_sec=4)
t_vals_rwrist, rwrist_interp, cs_rwrist, rwrist_x, rwrist_y, rwrist_z = resample_and_detrend_splines(r_x, r_y, r_z, sampling_rate_rwrist, duration_sec=4)

# Plot
plt.figure(figsize=(10, 4))
plt.plot(t_vals_torso, torso_x, color='r', label='X')
plt.plot(t_vals_torso, torso_y, color='g', label='Y')
plt.plot(t_vals_torso, torso_z, color='b', label='Z')
plt.title(f"Accelerometer Readings Torso - Label: {label}")
plt.xlabel("Time (s)")
plt.ylabel("Acceleration")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

plt.figure(figsize=(10, 4))
plt.plot(t_vals_lwrist, lwrist_x, color='r', label='X')
plt.plot(t_vals_lwrist, lwrist_y, color='g', label='Y')
plt.plot(t_vals_lwrist, lwrist_z, color='b', label='Z')
plt.title(f"Accelerometer Readings Left Wrist - Label: {label}")
plt.xlabel("Time (s)")
plt.ylabel("Acceleration")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

plt.figure(figsize=(10, 4))
plt.plot(t_vals_rwrist, rwrist_x, color='r', label='X')
plt.plot(t_vals_rwrist, rwrist_y, color='g', label='Y')
plt.plot(t_vals_rwrist, rwrist_z, color='b', label='Z')
plt.title(f"Accelerometer Readings Right Wrist - Label: {label}")
plt.xlabel("Time (s)")
plt.ylabel("Acceleration")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
from AQSM_SW1PerS.utils.period_estimation import *

# Torso
period_estimator = PeriodEstimator(fs = sampling_rate_torso, num_components = 3, f_min = 0.5, f_max = 2.0, window_size = 4.0)
period_torso = period_estimator.estimate_period(torso_interp)
print(f'Estimated Period Torso: {period_torso}')
print('')
# Left Wrist
period_estimator = PeriodEstimator(fs = sampling_rate_lwrist, num_components = 3, f_min = 0.5, f_max = 2.0, window_size = 4.0)
period_lwrist = period_estimator.estimate_period(lwrist_interp)
print(f'Estimated Period Left Wrist: {period_lwrist}')
print('')
# Right Wrist
period_estimator = PeriodEstimator(fs = sampling_rate_rwrist, num_components = 3, f_min = 0.5, f_max = 2.0, window_size = 4.0)
period_rwrist = period_estimator.estimate_period(rwrist_interp)
print(f'Estimated Period Right Wrist: {period_rwrist}')

d = 23

tau_torso = period_torso / (d+1)
tau_lwrist = period_lwrist / (d+1)
tau_rwrist = period_rwrist / (d+1)


In [None]:

from sklearn.decomposition import PCA

def generate_and_plot_sw_point_cloud(cs, t_vals, tau, d, title_prefix="Sensor", color=None):
    SW = SW_cloud_nD(cs, t_vals, tau,d, 300, 3)
    pca = PCA(n_components=2)
    proj_2D = pca.fit_transform(SW)

    plt.figure(figsize=(6, 5))
    plt.scatter(proj_2D[:, 0], proj_2D[:, 1], s=10, alpha=0.7, color=color)
    plt.title(fr'{title_prefix} SW Point Cloud')
    plt.xlabel("PC 1")
    plt.ylabel("PC 2")
    plt.grid(True)
    plt.tight_layout()
    plt.show()

    return SW

In [None]:

# Torso
SW_torso = generate_and_plot_sw_point_cloud(
    cs_torso, t_vals_torso, tau_torso, d, title_prefix="Torso", color='crimson'
)

# Left Wrist
SW_lwrist = generate_and_plot_sw_point_cloud(
    cs_lwrist, t_vals_lwrist, tau_lwrist, d, title_prefix="Left Wrist", color='darkgreen'
)

# Right Wrist
SW_rwrist = generate_and_plot_sw_point_cloud(
    cs_rwrist, t_vals_rwrist, tau_rwrist, d, title_prefix="Right Wrist", color='royalblue'
)


In [None]:

def analyze_SW_point_cloud(SW, sensor_name="Sensor", d=23):
    """Run TDA and plot persistence diagram + 10MPS scores."""
    coeff = next_prime(2*d)
    result = ripser(SW, coeff=coeff, maxdim=1)
    diagrams = result['dgms']
    dgm1 = np.array(diagrams[1])

    # Compute scores
    scores = compute_PS(dgm1, method='PS1')
    try:
        num = int(len(scores))
        x_lim = 9.5
    except:
        num = 1
        x_lim = 0.5

    # Plot
    fig, axes = plt.subplots(1, 2, figsize=(20, 10)) 
    plot_diagrams(diagrams, plot_only=[1], xy_range=[0, 2, 0, 2], ax=axes[0])
    axes[0].set_title(fr"{sensor_name} - Persistence Diagram")

    axes[1].bar(range(num), scores, alpha=0.5)
    axes[1].set_title(fr"{sensor_name} - $PS_1$ Score")
    axes[1].set_xlim(-0.5, x_lim)
    axes[1].set_ylim(0, 1)
    axes[1].set_xticks([])

    plt.tight_layout()
    plt.show()

    return diagrams, scores

In [None]:

diagrams_torso, scores_torso = analyze_SW_point_cloud(SW_torso, sensor_name="Torso", d=d)
diagrams_lwrist, scores_lwrist = analyze_SW_point_cloud(SW_lwrist, sensor_name="Left Wrist", d=d)
diagrams_rwrist, scores_rwrist = analyze_SW_point_cloud(SW_rwrist, sensor_name="Right Wrist", d=d)
