# Packages

In [9]:
import pandas as pd
import numpy as np
import glob
import os

# Data Read-in


In [10]:
# Get all csv files in the folder
folder_path = "Aligned Data All Participants"

# Initial result List
lst_all = []

# Get all CSV files
all_files = glob.glob(os.path.join(folder_path, "*.csv"))

for file in all_files:
    try:
        # Get the filename without path or extension
        filename = os.path.splitext(os.path.basename(file))[0]
        
        # Read the CSV file
        df = pd.read_csv(file)

        # Add source column (IMU or MoCap)
        if 'IMU' in filename.upper():
            device = "IMU"
            df['device'] = device
            df.rename(columns={"Shifted Time (s)": "time"}, inplace = True) 
        elif 'MOCAP' in filename.upper():
            device = "MoCap"
            df['device'] = device
            df.rename(columns={"Time (s)": "time"}, inplace = True) 
        else:
            df['device'] = 'Unknown'
            print(f"Warning: Couldn't determine source for {filename}")
        
        # Add filename as a column for reference
        df['filename'] = filename 
        
        # Categorize based on filename
        if 'AxialRotation' in filename:
            df['movement'] = 'axial'
            df.rename(columns={f"AxialRotation Angle {device} (degrees)": "angle"}, inplace = True)
        elif 'Flexion' in filename:
            df['movement'] = 'flexion'
            df.rename(columns={f"Flexion Angle {device} (degrees)": "angle"}, inplace = True)
        elif 'LateralBending' in filename:
            df['movement'] = 'lateral'
            df.rename(columns={f"LateralBending Angle {device} (degrees)": "angle"}, inplace = True)
        else:
            print(f"File {filename} didn't match any category")

        lst_all.append(df)
            
    except Exception as e:
        print(f"Error loading {file}: {str(e)}")

    combined_df = pd.concat(lst_all, ignore_index=True)

# Select only the columns we want to keep
final_columns = ['time', 'angle', 'device', 'movement', 'filename']
combined_df = combined_df[final_columns]

# Data Cleaning

## Create New Columns

In [11]:
# Extract components from filename using regular expression
pattern = r'^(\d+)(.*?)_(IMU|MoCap)_(\d+)s_to_(\d+)s$'

# Create new columns
combined_df['participant'] = combined_df['filename'].str.extract(pattern, expand=False)[0].astype(float)

# Identify any filenames that didn't match the pattern
unmatched = combined_df[combined_df['participant'].isna()]['filename'].unique()
if len(unmatched) > 0:
    print(f"Warning: Filename format not recognized for: {unmatched}")

# Reorder columns
column_order = ['participant', 'time', 'angle', 'device', 'movement', 'filename']
combined_df = combined_df[column_order]

## Align Time Between IMU and MoCap (by participant and movement)

In [12]:
# Round timestamps to align them
combined_df['aligned_time'] = combined_df['time'].round(2)

# Aggregate duplicates by taking the mean angle
agg_df = combined_df.groupby(['participant', 'movement', 'aligned_time', 'device']).agg({
    'angle': 'mean',
    'filename': 'first'
}).reset_index()

# Pivot to create columns for each device type
aligned_df = agg_df.pivot(
    index=['participant', 'movement', 'aligned_time'],
    columns='device',
    values=['angle', 'filename']
).reset_index()

# Flatten the multi-level columns
aligned_df.columns = [
    'participant', 'movement', 'aligned_time',
    'angle_imu', 'angle_mocap',
    'filename_imu', 'filename_mocap'
]

# Drop rows where we don't have both measurements
aligned_df = aligned_df.dropna(subset=['angle_imu', 'angle_mocap'])

# Convert to long format
long_df = pd.melt(
    aligned_df,
    id_vars=['participant', 'movement', 'aligned_time'],
    value_vars=['angle_imu', 'angle_mocap'],
    var_name='device_type',
    value_name='angle'
)

# Clean up device names and select correct filename
long_df['device'] = long_df['device_type'].str.replace('angle_', '')

# Select final columns and sort
combined_df = long_df[[
    'participant', 'movement', 'aligned_time', 
    'device', 'angle'
]].sort_values(['participant', 'movement', 'aligned_time', 'device'])

# Reset index
combined_df = combined_df.reset_index(drop=True)

In [13]:
combined_df.head(5)

Unnamed: 0,participant,movement,aligned_time,device,angle
0,1.0,axial,95.01,imu,-4.155804
1,1.0,axial,95.01,mocap,-1.828886
2,1.0,axial,95.02,imu,-4.117835
3,1.0,axial,95.02,mocap,-1.835366
4,1.0,axial,95.03,imu,-4.062786


# Calculate Proportion of Time Spent in Bad Posture

In [14]:
# Thresholds for considering an angle measurement "harmful"
THRESH_FLEXION = 60
THRESH_AXIAL = 30
THRESH_LATERAL = 20

harm_df = (
    combined_df.copy()
    .assign(
        harmful=lambda x: np.select(
            condlist=[
                x['movement'] == 'flexion',
                x['movement'] == 'axial',
                x['movement'] == 'lateral'
            ],
            choicelist=[
                x['angle'].abs() >= THRESH_FLEXION,
                x['angle'].abs() >= THRESH_AXIAL,
                x['angle'].abs() >= THRESH_LATERAL
            ],
            default=False  # if movement doesn't match any condition
        )
    )
    .groupby(['participant', 'movement', 'device'])
    .agg(
        prop_harmful=('harmful', lambda x: x.sum() / len(x)),
        time_harmful=('harmful', lambda x: x.sum() * .01),
        range_of_motion=('angle', lambda x: x.max() - x.min())
    )
    .reset_index()
    .sort_values(['participant', 'movement', 'device'])
)

In [15]:
harm_df.head(5)

Unnamed: 0,participant,movement,device,prop_harmful,time_harmful,range_of_motion
0,1.0,axial,imu,0.506316,14.83,131.954873
1,1.0,axial,mocap,0.527825,15.46,163.38164
2,1.0,flexion,imu,0.252619,8.68,85.713295
3,1.0,flexion,mocap,0.281432,9.67,87.631384
4,1.0,lateral,imu,0.56696,17.4,97.103818


# Save Output

In [16]:
combined_df.to_csv(f"all_participants.csv", index=False)
harm_df.to_csv(f"curve_characteristics.csv", index=False)