In [1]:
import pickle
import numpy as np
import pandas as pd
from datetime import datetime

In [2]:
# Load from a pickle file
with open('data/processed_force_plate_data.pkl', 'rb') as file:
    data = pickle.load(file)

## Data Validation
All of the .csv files were processed automatically, so we need to make sure all of the data in the baseline and intervention folders were processed and made it to the pickle file.

Need to validate:
1. Number of unique IDs
2. If each ID has trial_name{s} of shoulder 1-1 to tandem 2-3 in all of their TrialData. Print IDs missing the 12 trials.
3. Output of a full count

In [3]:
# Expected trial names for different scenarios
expected_12_trials = [
    "shoulder 1-1", "shoulder 1-2", "shoulder 1-3",
    "tandem 1-1", "tandem 1-2", "tandem 1-3",
    "shoulder 2-1", "shoulder 2-2", "shoulder 2-3",
    "tandem 2-1", "tandem 2-2", "tandem 2-3"
]

expected_6_trials = [
    "shoulder 1-1", "shoulder 1-2", "shoulder 1-3",
    "tandem 1-1", "tandem 1-2", "tandem 1-3"
]

# Get all unique subject IDs
subject_ids = list(data.keys())
print(f"Unique subject IDs: {subject_ids}")
print(f"Number of unique subject IDs: {len(subject_ids)}")
print()

# Lists to track subjects with different counts
incorrect_count_subjects = []
subjects_with_6_trials = []
subjects_with_12_trials = []
incorrect_trial_names = []

for subject_id in subject_ids:
    subject_data = data[subject_id]
    num_trials = len(subject_data)

    # Track subjects by number of trials
    if num_trials == 6:
        subjects_with_6_trials.append(subject_id)
    elif num_trials == 12:
        subjects_with_12_trials.append(subject_id)
    else:
        incorrect_count_subjects.append((subject_id, num_trials))
        continue

    # Extract trial names from the subject's data
    trial_names = []
    for trial_data_obj in subject_data:
        if hasattr(trial_data_obj, 'trial_name'):
            # Strip the extra quotation marks from the trial_name
            clean_name = trial_data_obj.trial_name.strip('"')
            trial_names.append(clean_name)
        else:
            print(f"Warning: Subject {subject_id} has a TrialData object without 'trial_name' attribute")

    # Validate trial names based on the number of trials
    if num_trials == 12:
        if set(trial_names) != set(expected_12_trials):
            incorrect_trial_names.append((subject_id, num_trials, trial_names))
    elif num_trials == 6:
        if set(trial_names) != set(expected_6_trials):
            incorrect_trial_names.append((subject_id, num_trials, trial_names))

# Print subjects with 6 trials vs 12 trials
print(f"Subjects with 6 trials ({len(subjects_with_6_trials)}): {subjects_with_6_trials}")
print(f"Subjects with 12 trials ({len(subjects_with_12_trials)}): {subjects_with_12_trials}")
print()

# Print subjects with incorrect number of trials
if incorrect_count_subjects:
    print("Subjects with incorrect number of trials:")
    for subject_id, count in incorrect_count_subjects:
        print(f"  Subject {subject_id}: {count} trials (expected 6 or 12)")
else:
    print("All subjects have either 6 or 12 trials.")

print()

# Print subjects with incorrect trial names
if incorrect_trial_names:
    print("Subjects with incorrect trial names:")
    for subject_id, count, trial_names in incorrect_trial_names:
        expected = expected_12_trials if count == 12 else expected_6_trials
        missing = set(expected) - set(trial_names)
        extra = set(trial_names) - set(expected)

        print(f"  Subject {subject_id} ({count} trials):")
        if missing:
            print(f"    Missing: {list(missing)}")
        if extra:
            print(f"    Extra: {list(extra)}")
        print(f"    Found: {trial_names}")
        print()
else:
    print("All subjects have correct trial names.")

Unique subject IDs: ['exgm133', 'exgm156', 'exgm048', 'exgm010', 'exgm046', 'exgm132', 'exgm051', 'exgm120', 'exgm107', 'exgm109', 'exgm116', 'exgm082', 'exgm081', 'exgm018', 'exgm184', 'exgm145', 'exgm023', 'exgm087', 'exgm097', 'exgm122', 'exgm043', 'exgm079', 'exgm169', 'exgm084', 'exgm004', 'exgm050', 'exgm193', 'exgm181', 'exgm144', 'exgm182', 'exgm057', 'exgm020', 'exgm117', 'exgm090', 'exgm012', 'exgm185', 'exgm105', 'exgm003', 'exgm164', 'exgm078', 'exgm190', 'exgm019', 'exgm119', 'exgm136', 'exgm121', 'exgm171', 'exgm172', 'exgm146', 'exgm175', 'exgm188', 'exgm160', 'exgm162', 'exgm102', 'exgm047', 'exgm111', 'exgm152', 'exgm025', 'exgm016', 'exgm154', 'exgm149', 'exgm108', 'exgm031', 'exgm179', 'exgm005', 'exgm077', 'exgm002', 'exgm021']
Number of unique subject IDs: 67

Subjects with 6 trials (4): ['exgm175', 'exgm160', 'exgm152', 'exgm077']
Subjects with 12 trials (63): ['exgm133', 'exgm156', 'exgm048', 'exgm010', 'exgm046', 'exgm132', 'exgm051', 'exgm120', 'exgm107', 'exgm

## Analyze COP Data
Now that we have validated the structure of our custom objects, we are ready to calculate the COP RMS Distance from the Mean and the Mean Velocity across all trials

**To make it easier to gather summary statistics, we should calculate each metric (RDIST and MVELO) within each TrialData object.**

In [4]:
for subject_id in data.keys():
    for trial in data[subject_id]:
        # Reference AP and ML time series to mean COP
        ML = trial.ml_filtered - np.mean(trial.ml_filtered)
        AP = trial.ap_filtered - np.mean(trial.ap_filtered)

        # Resultant Distance (RD)
        RD = np.sqrt(np.square(AP) + np.square(ML))

        # RMS Distance (RDIST)
        RDIST = np.sqrt(np.mean(np.square(RD)))

        # Average Velocity of COP (MVELO)
        AP_diff = np.diff(AP)
        ML_diff = np.diff(ML)

        totex_prep = np.square(AP_diff) + np.square(ML_diff)
        totex_sqrt = np.sqrt(totex_prep)

        totex = np.sum(totex_sqrt)

        MVELO = totex / (trial.time_trimmed[-1] - trial.time_trimmed[0])  # mm per second

        # Add the calculated values as new attributes to the TrialData object
        trial.RDIST = RDIST
        trial.MVELO = MVELO

print("RDIST and MVELO calculations completed for all trials!")

RDIST and MVELO calculations completed for all trials!


## Checking Results for a Specific Participant

### Input a specific subject ID and Trial

In [5]:
# Example: Check the results for a specific trial
subject_id = 'exgm133'
trial_index = 2
trial = data[subject_id][trial_index]

### Conduct Test Calculation

In [6]:
ML = trial.ml_filtered - np.mean(trial.ml_filtered)
AP = trial.ap_filtered - np.mean(trial.ap_filtered)
# Resultant Distance (RD)
RD = np.sqrt(np.square(AP) + np.square(ML))

# RMS Distance (RDIST)
RDIST = np.sqrt(np.mean(np.square(RD)))

# Average Velocity of COP (MVELO)
AP_diff = np.diff(AP)
ML_diff = np.diff(ML)

totex_prep = np.square(AP_diff) + np.square(ML_diff)
totex_sqrt = np.sqrt(totex_prep)

totex = np.sum(totex_sqrt)

MVELO = totex / (trial.time_trimmed[-1] - trial.time_trimmed[0])  # mm per second

print(f"\nExample results for {subject_id}, trial {trial_index}:")
print(f"Trial name: {trial.trial_name.strip('\"')}")
print(f"Manual RDIST calculation: {RDIST:.4f}")
print(f"Automated RDIST calculation: {trial.RDIST:.4f}")
print(f"Manual MVELO calculation: {MVELO:.4f}")
print(f"Automated MVELO calculation: {trial.MVELO:.4f}")
print(f"Start time for trial: {trial.time_trimmed[0]}")
print(f"End time for trial: {trial.time_trimmed[-1]}")


Example results for exgm133, trial 2:
Trial name: tandem 1-1
Manual RDIST calculation: 9.5766
Automated RDIST calculation: 9.5766
Manual MVELO calculation: 30.5278
Automated MVELO calculation: 30.5278
Start time for trial: 10.0
End time for trial: 30.0


# Average Across Trials for Each Participant/Session
_EDIT: Dr. Queen and Dr. Arena suggested that we use all data points instead of averaging across all three trials. So try to save the data in long format with the columns below:_
- id
- session
- Metric
- Stance
- Trial Number (1-1,1-2, etc.)
- Value

In [7]:
# Create lists to store all the data
subject_ids_list = []
sessions = []
metrics = []
stances = []
trials = []
values = []

# Process each subject and trial
for subject_id in subject_ids:
    subject_data = data[subject_id]

    for trial in subject_data:
        # Get trial information
        session = trial.session
        trial_name = trial.trial_name.strip('"')
        rdist = trial.RDIST
        mvelo = trial.MVELO

        # Determine stance (shoulder or tandem)
        if trial_name.startswith('shoulder'):
            stance = 'shoulder'
        elif trial_name.startswith('tandem'):
            stance = 'tandem'
        else:
            stance = 'unknown'  # Just in case

        # Extract trial number (e.g., "1-1", "2-3")
        trial_number = trial_name.split(' ')[1]  # Gets "1-1" from "shoulder 1-1"

        # Add RDIST row
        subject_ids_list.append(subject_id)
        sessions.append(session)
        metrics.append('RDIST')
        stances.append(stance)
        trials.append(trial_number)
        values.append(rdist)

        # Add MVELO row
        subject_ids_list.append(subject_id)
        sessions.append(session)
        metrics.append('MVELO')
        stances.append(stance)
        trials.append(trial_number)
        values.append(mvelo)

# Create the DataFrame
df_long = pd.DataFrame({
    'subject_id': subject_ids_list,
    'session': sessions,
    'metric': metrics,
    'stance': stances,
    'trial': trials,
    'value': values
})

# Export to CSV
today = datetime.now().strftime('%m%d%Y')
df_long.to_csv(f'data/force_long_{today}.csv', index=False)
print(f"\nData exported to 'data/force_long_{today}.csv'")

# Display information about the DataFrame
print("Long-form DataFrame created!")
print(f"DataFrame shape: {df_long.shape}")
print(f"Number of unique subjects: {df_long['subject_id'].nunique()}")
print(f"Number of unique sessions: {df_long['session'].nunique()}")
print(f"Unique sessions: {df_long['session'].unique()}")
print(f"Number of unique metrics: {df_long['metric'].nunique()}")
print(f"Unique metrics: {df_long['metric'].unique()}")

# Display first few rows
print("\nFirst 24 rows of the DataFrame:")
print(df_long.head(24))

# Display some example data for one subject
# example_subject = df_long['subject_id'].iloc[0]
# print(f"\nExample data for subject {example_subject}:")
# print(df_long[df_long['subject_id'] == example_subject].head(24))


Data exported to 'data/force_long_09202025.csv'
Long-form DataFrame created!
DataFrame shape: (1560, 6)
Number of unique subjects: 67
Number of unique sessions: 2
Unique sessions: ['baseline' 'intervention']
Number of unique metrics: 2
Unique metrics: ['RDIST' 'MVELO']

First 24 rows of the DataFrame:
   subject_id       session metric    stance trial      value
0     exgm133      baseline  RDIST  shoulder   1-1   2.956006
1     exgm133      baseline  MVELO  shoulder   1-1   3.975868
2     exgm133      baseline  RDIST  shoulder   1-3   4.517230
3     exgm133      baseline  MVELO  shoulder   1-3   5.527422
4     exgm133      baseline  RDIST    tandem   1-1   9.576576
5     exgm133      baseline  MVELO    tandem   1-1  30.527820
6     exgm133      baseline  RDIST    tandem   1-2   7.737465
7     exgm133      baseline  MVELO    tandem   1-2  24.697659
8     exgm133      baseline  RDIST    tandem   1-3   8.336920
9     exgm133      baseline  MVELO    tandem   1-3  21.811279
10    exgm133 