In [1]:
import io
import pandas as pd
from connections.aws import AWS

In [2]:
""" INITIALIZE CONNECTION """
# create connection
aws_connection = AWS()
aws_connection.connect()

[AWS]: Port 5433 is free.
[AWS]: Connected to RDS endpoint.


$\textbf{Marker Processing}$

Loads, checks format, and rotates all TRC files for each pitcher.

In [84]:
import biomech
from biomech.processing.trc import *
from biomech.processing import rotate_data

In [4]:
""" S3 OBJECT LISTING """
# NOTE: have to update IAM user permissions to allow S3 access
s3_objects_pro = aws_connection.list_s3_objects(prefix='PRO/')

# get all unique subject IDs
subject_ids = set()
for obj in s3_objects_pro:
    subject_id = obj.split('/')[1]
    subject_ids.add(subject_id)

# convert to list
subject_ids = sorted(list(subject_ids))

In [5]:
# extract (raw, not yet usable) CSV & TRC objects
s3_objects_csv = [obj for obj in s3_objects_pro if obj.endswith('.csv') and obj.split('.csv')[0].split('/')[-1]  in ['trackman', 'rapsodo']]
s3_objects_trc = [obj for obj in s3_objects_pro if obj.endswith('.trc')]

# load subject info (for throwing hand)
subject_info_bytes = aws_connection.load_s3_object('subjects/summary/subject_info.csv', return_info=False)
subject_info = pd.read_csv(io.BytesIO(subject_info_bytes))

In [92]:
subject_info['subject_id'].values

array([2609, 2610, 2611, 2612, 2613, 2614, 2616, 2618, 2619, 2621, 2622,
       2623, 2624, 2625, 2627, 2628, 2629, 2630, 2631, 2633, 2634, 2635,
       2636, 2638, 2639, 2640, 2641, 2642, 2643, 2644, 2645, 2646, 2647,
       2648, 2649, 2650, 2651, 2652, 2653, 2654, 2655, 2657, 2658, 2659,
       2660, 2661, 2662, 2664, 2665, 2666, 2667, 2668, 2669, 2670, 2671,
       2672, 2673, 2674, 2675, 2676, 2680, 2681, 2726, 2727, 2728, 2745,
       2746, 2747, 2748, 2749, 2750, 2751, 2761, 2762, 2764, 2765, 2766,
       2767, 2768, 2941, 2942, 2943, 2945, 2946, 2947, 2948, 2949, 2950,
       2951, 2952, 2953, 2954, 2955, 2956, 2957, 2958, 2959, 2961, 2962,
       2964, 2965, 2966, 2967, 2968, 2969, 2970, 2971, 2972, 2973, 2974,
       2975, 2976, 2977, 2978, 2979, 2980, 2981, 2982, 2983, 2984, 2985,
       2986, 2987, 2988, 2989, 2991, 2992, 2993, 2994, 2996, 2997, 2998,
       3023, 3025, 3027, 3028, 3032, 3033, 3034, 3035, 3036, 3039, 3040,
       3041, 3042, 3043, 3045, 3047, 3050, 3051, 30

In [93]:
""" TRC READING """

# constants
FRAME_RATE = 480
PARENT_DIR = 'subjects'

# store error trials for investigation
error_trials = []

# iterate through all TRCs
for trc_file in s3_objects_trc:
    
    # load subject info
    subject_id = trc_file.split('/')[1]

    # check if subject ID exists in subject_info
    if int(subject_id) not in subject_info['subject_id'].values:
        print(f"Subject ID {subject_id} not found in subject_info. Skipping TRC file {trc_file}.")
        continue
    
    # get throwing hand for subject
    throwing_hand = subject_info[subject_info['subject_id'] == int(subject_id)]['throws'].values[0]

    try:
        # load bytes from S3
        trc_bytes, trc_info = aws_connection.load_s3_object(trc_file)
        
        # read TRC data (body) & rotate to match OpenSim coordinates
            # NOTE: throwing hand not used here to properly check format
        trc_body = parse_trc_body(trc_bytes)
        trc_body_checked, format_valid = check_trc_format(trc_body)                       # check for valid TRC format (Time, Frame#)
        trc_body_rotated = rotate_data(trc_body_checked)

        # filter markers to correct throwing hand
        if throwing_hand == 'right':
            trc_body_clean = trc_body_rotated[['Frame#', 'Time'] + biomech.processing.trc.__markers_right__]
        elif throwing_hand == 'left':
            trc_body_clean = trc_body_rotated[['Frame#', 'Time'] + biomech.processing.trc.__markers_left__]

        # get TRC header information
            # NOTE: initial upload hardcoded throwing hand
        trc_header = create_trc_header(trc_info['study_id'], trc_body_clean, throwing_hand, FRAME_RATE)

        # put object to S3
        aws_connection.upload_trc_to_s3(trc_header, trc_body_clean, f'{PARENT_DIR}/{trc_info["subject_id"]}/trc_raw/{trc_info["study_id"]}.trc')

    except Exception as e:
        print(f"Error processing TRC file {trc_file} for subject {subject_id}: {e}")
        
        # log error
        error_trials.append({
            'subject_id': subject_id,
            'trc_file': trc_file,
            'format_valid': format_valid,
            'error': str(e)
        })
        
        continue

[AWS]: TRC file written to s3://pitch-ml/subjects/2609/trc_raw/2609_01.trc
[AWS]: TRC file written to s3://pitch-ml/subjects/2609/trc_raw/2609_02.trc
[AWS]: TRC file written to s3://pitch-ml/subjects/2609/trc_raw/2609_03.trc
[AWS]: TRC file written to s3://pitch-ml/subjects/2609/trc_raw/2609_04.trc
[AWS]: TRC file written to s3://pitch-ml/subjects/2609/trc_raw/2609_05.trc
[AWS]: TRC file written to s3://pitch-ml/subjects/2609/trc_raw/2609_06.trc
[AWS]: TRC file written to s3://pitch-ml/subjects/2609/trc_raw/2609_07.trc
[AWS]: TRC file written to s3://pitch-ml/subjects/2609/trc_raw/2609_08.trc
[AWS]: TRC file written to s3://pitch-ml/subjects/2609/trc_raw/2609_09.trc
[AWS]: TRC file written to s3://pitch-ml/subjects/2609/trc_raw/2609_10.trc
[AWS]: TRC file written to s3://pitch-ml/subjects/2609/trc_raw/2609_11.trc
[AWS]: TRC file written to s3://pitch-ml/subjects/2609/trc_raw/2609_12.trc
[AWS]: TRC file written to s3://pitch-ml/subjects/2609/trc_raw/2609_13.trc
[AWS]: TRC file written t

$\textit{Upload Processing Summary}$

Now includes `format_valid` for traceback debugging.

In [94]:
# write error trials to file for later investigation
error_trials_summary = pd.DataFrame(error_trials)
aws_connection.upload_to_s3(
    error_trials_summary,
    s3_key=f'subjects/summary/error_trials_summary.csv'
)

[AWS]: Uploaded object to s3://pitch-ml/subjects/summary/error_trials_summary.csv


In [95]:
# close connection
aws_connection.close()

[AWS]: Database connection closed.
[AWS]: SSH tunnel stopped.
