In [1]:
import io
import pandas as pd
from connections.aws import AWS

In [2]:
""" INITIALIZE CONNECTION """
# create connection
aws_connection = AWS()
aws_connection.connect()

[AWS]: Port 5433 is free.
[AWS]: Connected to RDS endpoint.


In [3]:
""" S3 FILE STRUCTURE """
# subjects/
#   <subject_id>/
#     csv_raw/
#     csv_processed/
#     trc_raw/
#     trc_processed/
#     mot_processed/

' S3 FILE STRUCTURE '

In [4]:
""" S3 OBJECT LISTING """
# NOTE: have to update IAM user permissions to allow S3 access
s3_objects_pro = aws_connection.list_s3_objects(prefix='PRO/')

# get all unique subject IDs
subject_ids = set()
for obj in s3_objects_pro:
    subject_id = obj.split('/')[1]
    subject_ids.add(subject_id)

# convert to list
subject_ids = sorted(list(subject_ids))

$\textbf{Object Extraction}$

- CSVs
- TRCs (static & trial)

In [5]:
# extract (raw, not yet usable) CSV & TRC objects
s3_objects_csv = [obj for obj in s3_objects_pro if obj.endswith('.csv') and obj.split('.csv')[0].split('/')[-1]  in ['trackman', 'rapsodo']]
s3_objects_trc = [obj for obj in s3_objects_pro if obj.endswith('.trc')]

In [6]:
# load example bytes from S3
trc_object, trc_info = aws_connection.load_s3_object(s3_objects_trc[0])
csv_object = aws_connection.load_s3_object(s3_objects_csv[0], return_info=False)         # NOTE: csvs will end with `rapsodo` or `trackman` (if clean)

$\textit{CSV Data}$

In [None]:
""" CSV READING & LOADING"""
# iterate through all CSVs
for csv_file in s3_objects_csv:

    # get ball tracking source (rapsodo or trackman)
    ball_tracking_source = csv_file.split('.csv')[0].split('/')[-1]

    # skip rows until columns are found
    for i in range(5):
        csv_raw = pd.read_csv(io.BytesIO(csv_object), skiprows=i)
        
        # skip if the first column is an index column
        if 'Pitch Type' in list(csv_raw.columns):
            subject_id = csv_file.split('/')[1]                         # get subject ID from file path
            csv_raw['ball_tracking_source'] = ball_tracking_source      # add ball tracking source column

            # upload to S3
            aws_connection.upload_to_s3(
                csv_raw,
                s3_key=f'subjects/{subject_id}/csv_raw/ball_tracking.csv'
            )
        
        # move on to next row if columns not found
        else:
            continue

        break        # move on to next file after first CSV is found


[AWS]: Uploaded object to s3://pitch-ml/subjects/2609/csv_raw/ball_tracking.csv
[AWS]: Uploaded object to s3://pitch-ml/subjects/2610/csv_raw/ball_tracking.csv
[AWS]: Uploaded object to s3://pitch-ml/subjects/2611/csv_raw/ball_tracking.csv
[AWS]: Uploaded object to s3://pitch-ml/subjects/2612/csv_raw/ball_tracking.csv
[AWS]: Uploaded object to s3://pitch-ml/subjects/2613/csv_raw/ball_tracking.csv
[AWS]: Uploaded object to s3://pitch-ml/subjects/2614/csv_raw/ball_tracking.csv
[AWS]: Uploaded object to s3://pitch-ml/subjects/2615/csv_raw/ball_tracking.csv
[AWS]: Uploaded object to s3://pitch-ml/subjects/2616/csv_raw/ball_tracking.csv
[AWS]: Uploaded object to s3://pitch-ml/subjects/2617/csv_raw/ball_tracking.csv
[AWS]: Uploaded object to s3://pitch-ml/subjects/2618/csv_raw/ball_tracking.csv
[AWS]: Uploaded object to s3://pitch-ml/subjects/2619/csv_raw/ball_tracking.csv
[AWS]: Uploaded object to s3://pitch-ml/subjects/2620/csv_raw/ball_tracking.csv
[AWS]: Uploaded object to s3://pitch-ml/

$\textit{TRC Data}$

- Rotate data
- Trim to relevant columns

In [7]:
from dev_trc import *
from biomech.processing import rotate_data

In [19]:
# load subject info (for throwing hand)
subject_info_bytes = aws_connection.load_s3_object('subjects/summary/subject_info.csv', return_info=False)
subject_info = pd.read_csv(io.BytesIO(subject_info_bytes))

In [25]:
subject_info[subject_info['subject_id'] == 2609]

Unnamed: 0,subject_id,age,height,mass,throws
0,2609,31.1,1.8796,90.91,right


In [28]:
""" TRC READING """

# constants
FRAME_RATE = 480
PARENT_DIR = 'subjects'

# store error trials for investigation
error_trials = []

# iterate through all TRCs
for trc_file in s3_objects_trc:
    subject_id = trc_file.split('/')[1]

    try:
        # load bytes from S3
        trc_bytes, trc_info = aws_connection.load_s3_object(trc_file)
        
        # read TRC data (body) & rotate to match OpenSim coordinates
        trc_body = parse_trc_body(trc_bytes)
        trc_body_rotated = rotate_data(trc_body)

        # get TRC header information
        throwing_hand = subject_info[subject_info['subject_id'] == int(subject_id)]['throws'].values[0]
        trc_header = create_trc_header(trc_info['study_id'], trc_body, 'right', FRAME_RATE)

        # put object to S3
        aws_connection.upload_trc_to_s3(trc_header, trc_body_rotated, f'{PARENT_DIR}/{trc_info["subject_id"]}/trc_raw/{trc_info["study_id"]}.trc')

    except Exception as e:
        print(f"Error processing TRC file {trc_file} for subject {subject_id}: {e}")
        
        # log error
        error_trials.append({
            'subject_id': subject_id,
            'trc_file': trc_file,
            'error': str(e)
        })
        
        continue

[AWS]: TRC file written to s3://pitch-ml/subjects/2609/trc_raw/2609_01.trc
[AWS]: TRC file written to s3://pitch-ml/subjects/2609/trc_raw/2609_02.trc
[AWS]: TRC file written to s3://pitch-ml/subjects/2609/trc_raw/2609_03.trc
[AWS]: TRC file written to s3://pitch-ml/subjects/2609/trc_raw/2609_04.trc
[AWS]: TRC file written to s3://pitch-ml/subjects/2609/trc_raw/2609_05.trc
[AWS]: TRC file written to s3://pitch-ml/subjects/2609/trc_raw/2609_06.trc
[AWS]: TRC file written to s3://pitch-ml/subjects/2609/trc_raw/2609_07.trc
[AWS]: TRC file written to s3://pitch-ml/subjects/2609/trc_raw/2609_08.trc
[AWS]: TRC file written to s3://pitch-ml/subjects/2609/trc_raw/2609_09.trc
[AWS]: TRC file written to s3://pitch-ml/subjects/2609/trc_raw/2609_10.trc
[AWS]: TRC file written to s3://pitch-ml/subjects/2609/trc_raw/2609_11.trc
[AWS]: TRC file written to s3://pitch-ml/subjects/2609/trc_raw/2609_12.trc
[AWS]: TRC file written to s3://pitch-ml/subjects/2609/trc_raw/2609_13.trc
[AWS]: TRC file written t

KeyboardInterrupt: 

In [15]:
# write error trials to file for later investigation
error_trials_summary = pd.DataFrame(error_trials)
aws_connection.upload_to_s3(
    error_trials_summary,
    s3_key=f'subjects/summary/error_trials_summary.csv'
)

[AWS]: Uploaded object to s3://pitch-ml/subjects/summary/error_trials_summary.csv


$\textbf{Subject Subfolders}$

For example, `<subject_id`> parent folder and `csv_raw`, `trc_raw`, etc.

In [39]:
aws_connection.create_s3_folder(f'test/')

[AWS]: Created folder s3://pitch-ml/test/


In [8]:
# create all subject folders
for subject_id in subject_ids:
    # top folder
    aws_connection.create_s3_folder(f'subjects/{subject_id}/')
    
    # create sub-folders by file type
    aws_connection.create_s3_folder(f'subjects/{subject_id}/csv_raw/')
    aws_connection.create_s3_folder(f'subjects/{subject_id}/csv_processed/')
    aws_connection.create_s3_folder(f'subjects/{subject_id}/trc_raw/')
    aws_connection.create_s3_folder(f'subjects/{subject_id}/trc_processed/')
    aws_connection.create_s3_folder(f'subjects/{subject_id}/inverse_kinematics/')
    aws_connection.create_s3_folder(f'subjects/{subject_id}/inverse_dynamics/')
    aws_connection.create_s3_folder(f'subjects/{subject_id}/osim/')

[AWS]: Folder s3://pitch-ml/subjects/2609/ already exists.
[AWS]: Folder s3://pitch-ml/subjects/2609/csv_raw/ already exists.
[AWS]: Folder s3://pitch-ml/subjects/2609/csv_processed/ already exists.
[AWS]: Folder s3://pitch-ml/subjects/2609/trc_raw/ already exists.
[AWS]: Folder s3://pitch-ml/subjects/2609/trc_processed/ already exists.
[AWS]: Created folder s3://pitch-ml/subjects/2609/inverse_kinematics/
[AWS]: Created folder s3://pitch-ml/subjects/2609/inverse_dynamics/
[AWS]: Created folder s3://pitch-ml/subjects/2609/osim/
[AWS]: Folder s3://pitch-ml/subjects/2610/ already exists.
[AWS]: Folder s3://pitch-ml/subjects/2610/csv_raw/ already exists.
[AWS]: Folder s3://pitch-ml/subjects/2610/csv_processed/ already exists.
[AWS]: Folder s3://pitch-ml/subjects/2610/trc_raw/ already exists.
[AWS]: Folder s3://pitch-ml/subjects/2610/trc_processed/ already exists.
[AWS]: Created folder s3://pitch-ml/subjects/2610/inverse_kinematics/
[AWS]: Created folder s3://pitch-ml/subjects/2610/inverse_

$\textbf{Close Connection}$

In [16]:
aws_connection.close()

[AWS]: Database connection closed.
[AWS]: SSH tunnel stopped.
