In [1]:
import io
import pandas as pd
from connections import AWS
from biomech.algorithms import butter_lowpass_filter

In [2]:
""" INITIALIZE AWS CONNECTION """
aws_connection = AWS()
aws_connection.connect()

[AWS]: Port 5433 is free.
[AWS]: Connected to RDS endpoint.


$\textbf{OpenSim Joint Reaction Analysis: Postprocessing}$

- Aggregate all results files
- Process results by computing normalized time, storing as dataframe, etc.
- Evaluate peaks for each trial & inspect for outliers

In [None]:
# load all subject info
subject_info = aws_connection.load_subject_info()

# load all (filtered) JRA files --> 3,650 total
s3_objects = aws_connection.list_s3_objects(prefix='biomechanics/subjects/')
jra_files = [obj for obj in s3_objects if obj.endswith('.sto') and 'jra_results' in obj]

In [7]:
# create normalized time column
def compute_normalized_time(
        data: pd.DataFrame
) -> pd.DataFrame:
    # create normalized time
    if 'normalized_time' not in data.columns:
        data.insert(
            0, 
            'normalized_time', 
            data['time'].transform(lambda x: (x - x.min()) / (x.max() - x.min()))
        )

    return data

# evalute peak value
def evaluate_peak_jra_values(
        data: pd.DataFrame,
        peak_col: str 
) -> dict:
    data_max = data[peak_col].max()
    data_min = data[peak_col].min()

    if data_max > abs(data_min):
        return {
            'subject_id': int(data['subject_id'].unique()[0]),
            'study_id': data['study_id'].unique()[0],
            'throwing_hand': data['throws'].unique()[0],
            'peak_value': data_max,                                         # this can be used to find joint angles at peak
            'peak_idx': data[peak_col].idxmax(),
            'peak_was_negative': 0
        }
    else:
        return {
            'subject_id': int(data['subject_id'].unique()[0]),
            'study_id': data['study_id'].unique()[0],
            'throwing_hand': data['throws'].unique()[0],
            'peak_value': abs(data_min),
            'peak_idx': data[peak_col].idxmin(),                            # this can be used to find joint angles at peak
            'peak_was_negative': 1
        }
    
# inspect subject results for outliers
def inspect_subject_results(
        data: pd.DataFrame,
        peak_label: str = 'peak_value'
) -> pd.DataFrame:
    # get peak value mean and standard deviation
        # NOTE: using median for mean to be more robust to outliers
    subject_avg = data[peak_label].median()
    subject_std = data[peak_label].std()

    # add `outlier_flag` column
    data['outlier_flag'] = 0

    # iterate through rows to check for outliers
    for idx, values in data.iterrows():
        # update outlier flag if peak value is more than 2 standard deviations from the mean
            # NOTE: using median for mean to be more robust to outliers
        if (values['peak_value'] > subject_avg + 1.96 * subject_std) or (values['peak_value'] < subject_avg - 1.96 * subject_std):
            data.at[idx, 'outlier_flag'] = 1

    return data



In [8]:
# initialize JRA peak results
error_log = []
all_subject_results = []

# iterate through subjects
    # NOTE: doing this to streamline outlier detection
for subject_id in subject_info['subject_id'].unique():

    print(f'Processing subject {subject_id}...', end='\r', flush=True)
    
    # filter JRA files for this subject
    subject_jra_files = [f for f in jra_files if str(subject_id) in f]

    # initialize list of peaks
    subject_jra_peaks = []
    
    # iterate through subject files
    for f in subject_jra_files:
        
        try:
            # extract subject ID, study ID, and throwing hand
            study_id = f.split('/')[-1].split('_jra')[0]
            subject_id = study_id.split('_')[0]
            throwing_hand = subject_info.loc[subject_info['subject_id'] == int(subject_id), 'throws'].values[0]

            # specify elbow moment columns
                # NOTE: this is throwing arm dependent
            if throwing_hand == 'right':
                ELBOW_MOMENT_COLS = [
                    'elbow_r_on_ulna_r_in_ulna_r_mx', 
                    'elbow_r_on_ulna_r_in_ulna_r_my', 
                    'elbow_r_on_ulna_r_in_ulna_r_mz'
                ]
                PEAK_COL = 'elbow_r_on_ulna_r_in_ulna_r_mx'
            else:
                ELBOW_MOMENT_COLS = [
                    'elbow_l_on_ulna_l_in_ulna_l_mx', 
                    'elbow_l_on_ulna_l_in_ulna_l_my', 
                    'elbow_l_on_ulna_l_in_ulna_l_mz'
                ]
                PEAK_COL = 'elbow_l_on_ulna_l_in_ulna_l_mx'
            
            # read data from S3
            jra_bytes = aws_connection.load_s3_object(f, return_info=False)
            jra_data = pd.read_csv(
                io.BytesIO(jra_bytes), 
                sep='\s+', 
                skiprows=11
            )
            
            # trim to elbow moment columns, then normalize time
            jra_data = jra_data[['time'] + ELBOW_MOMENT_COLS]
            jra_data_nt = compute_normalized_time(jra_data)

            # insert subject and study ID
            jra_data_nt.insert(0, 'subject_id', int(subject_id))
            jra_data_nt.insert(1, 'study_id', study_id)
            jra_data_nt.insert(2, 'throws', throwing_hand)

            # get peak summary
            peak_summary = evaluate_peak_jra_values(jra_data_nt, PEAK_COL)
            subject_jra_peaks.append(peak_summary)

        except Exception as e:
            # log error
            error_log.append({
                'study_id': study_id,
                'error': str(e)
            })

            print(f'Error processing {f}: {e}')
            
            continue

    # create dataframe of subject peaks --> inspect for outliers
        # NOTE: outliers are flagged, not removed
    if len(subject_jra_peaks) > 0:
        subject_jra_peaks_df = pd.DataFrame(subject_jra_peaks)
        subject_jra_clean = inspect_subject_results(subject_jra_peaks_df)
        
        # append subject results to full list
        all_subject_results.append(subject_jra_clean)

    print(f'Finished processing subject {subject_id}.')

# concatenate all subject results
all_subject_results_df = pd.concat(all_subject_results, ignore_index=True)


Finished processing subject 2609.
Finished processing subject 2610.
Finished processing subject 2611.
Finished processing subject 2612.
Finished processing subject 2613.
Finished processing subject 2614.
Finished processing subject 2616.
Finished processing subject 2618.
Finished processing subject 2619.
Finished processing subject 2621.
Finished processing subject 2622.
Finished processing subject 2623.
Finished processing subject 2624.
Finished processing subject 2625.
Finished processing subject 2627.
Finished processing subject 2628.
Finished processing subject 2629.
Finished processing subject 2630.
Finished processing subject 2631.
Finished processing subject 2633.
Finished processing subject 2634.
Finished processing subject 2635.
Finished processing subject 2636.
Finished processing subject 2638.
Finished processing subject 2639.
Finished processing subject 2640.
Finished processing subject 2641.
Finished processing subject 2642.
Finished processing subject 2643.
Finished proce

2025-07-11 12:04:24,186| ERROR   | Socket exception: Connection reset by peer (54)


In [16]:
# postprocessing --> ~58 ± 22 Nm 
all_subject_results_df.loc[all_subject_results_df['peak_value'] < 10, 'outlier_flag'] = 1
all_subject_results_df.loc[all_subject_results_df['peak_value'] > 200, 'outlier_flag'] = 1

In [None]:
# write full subject results to S3 (outliers separated; 247 outliers total, )
aws_connection.upload_to_s3(
    all_subject_results_df[all_subject_results_df['outlier_flag'] == 0].reset_index(drop=True).to_csv(index=False),
    'subjects/summary/results_jra.csv'
)
aws_connection.upload_to_s3(
    all_subject_results_df[all_subject_results_df['outlier_flag'] == 1].reset_index(drop=True).to_csv(index=False),
    'subjects/summary/results_jra_outliers.csv'
)

# write error log to S3 (if applicable)
if error_log:
    error_log_df = pd.DataFrame(error_log)
    aws_connection.upload_to_s3(
        error_log_df.to_csv(index=False),
        'subjects/summary/error_log_jra.csv'
    )

[AWS]: Uploaded object to s3://pitch-ml/subjects/summary/results_jra.csv
[AWS]: Uploaded object to s3://pitch-ml/subjects/summary/results_jra_outliers.csv
