In [1]:
import io
import pandas as pd
from connections.aws import AWS

In [2]:
""" INITIALIZE CONNECTION """
# create connection
aws_connection = AWS()
aws_connection.connect()

[AWS]: Port 5433 is free.
[AWS]: Connected to RDS endpoint.


In [3]:
""" S3 FILE STRUCTURE """
# subjects/
#   <subject_id>/
#     csv_raw/
#     csv_processed/
#     trc_raw/
#     trc_processed/
#     mot_processed/

' S3 FILE STRUCTURE '

In [4]:
""" S3 OBJECT LISTING """
# NOTE: have to update IAM user permissions to allow S3 access
s3_objects_pro = aws_connection.list_s3_objects(prefix='PRO/')

# get all unique subject IDs
subject_ids = set()
for obj in s3_objects_pro:
    subject_id = obj.split('/')[1]
    subject_ids.add(subject_id)

# convert to list
subject_ids = sorted(list(subject_ids))

$\textbf{Object Extraction}$

- CSVs
- TRCs (static & trial)

In [6]:
# extract (raw, not yet usable) CSV & TRC objects
s3_objects_csv = [obj for obj in s3_objects_pro if obj.endswith('.csv') and obj.split('.csv')[0].split('/')[-1]  in ['trackman', 'rapsodo']]
s3_objects_trc = [obj for obj in s3_objects_pro if obj.endswith('.trc')]

In [7]:
# load example bytes from S3
trc_object, trc_info = aws_connection.load_s3_object(s3_objects_trc[0])
csv_object = aws_connection.load_s3_object(s3_objects_csv[0], return_info=False)         # NOTE: csvs will end with `rapsodo` or `trackman` (if clean)

$\textit{CSV Data}$

In [7]:
""" CSV READING """
raw_csv_data = {}

# iterate through all CSVs
for csv_file in s3_objects_csv:

    # skip rows until columns are found
    for i in range(5):
        csv_raw = pd.read_csv(io.BytesIO(csv_object), skiprows=i)
        
        # skip if the first column is an index column
        if 'Pitch Type' in list(csv_raw.columns):
            subject_id = csv_file.split('/')[1]
            raw_csv_data[subject_id] = csv_raw 
        
        # move on to next row if columns not found
        else:
            continue

        break        # move on to next file after first CSV is found


$\textit{TRC Data}$

- Rotate data
- Trim to relevant columns

In [13]:
from dev_trc import *
from biomech.processing import rotate_data

In [None]:
""" TRC READING """

# constants
FRAME_RATE = 480
PARENT_DIR = 'subjects'

# initialize storage?
raw_trc_data = {}

# iterate through all TRCs
for trc_file in s3_objects_trc[0:1]:
    subject_id = trc_file.split('/')[1]
    
    # load bytes from S3
    trc_bytes, trc_info = aws_connection.load_s3_object(trc_file)
    
    # read TRC data (body) & rotate to match OpenSim coordinates
    trc_body = parse_trc_body(trc_bytes)
    trc_body_rotated = rotate_data(trc_body)

    # get TRC header information
    trc_header = create_trc_header(trc_info['study_id'], trc_body, 'right', FRAME_RATE)

    # TEST putting object to S3
    aws_connection.upload_trc_to_s3(trc_header, trc_body_rotated, f'test/{trc_info["study_id"]}.trc')
    # aws_connection.upload_trc_to_s3(trc_header, trc_body_rotated, f'{PARENT_DIR}/{trc_info['subject_id]}/{trc_info["study_id"]}.trc')

[AWS]: TRC file written to s3://pitch-ml/test/2609_01.trc


$\textbf{Subject Subfolders}$

For example, `<subject_id`> parent folder and `csv_raw`, `trc_raw`, etc.

In [39]:
aws_connection.create_s3_folder(f'test/')

[AWS]: Created folder s3://pitch-ml/test/


In [8]:
# create all subject folders
for subject_id in subject_ids:
    # top folder
    aws_connection.create_s3_folder(f'subjects/{subject_id}/')
    
    # create sub-folders by file type
    aws_connection.create_s3_folder(f'subjects/{subject_id}/csv_raw/')
    aws_connection.create_s3_folder(f'subjects/{subject_id}/csv_processed/')
    aws_connection.create_s3_folder(f'subjects/{subject_id}/trc_raw/')
    aws_connection.create_s3_folder(f'subjects/{subject_id}/trc_processed/')
    aws_connection.create_s3_folder(f'subjects/{subject_id}/inverse_kinematics/')
    aws_connection.create_s3_folder(f'subjects/{subject_id}/inverse_dynamics/')
    aws_connection.create_s3_folder(f'subjects/{subject_id}/osim/')

[AWS]: Folder s3://pitch-ml/subjects/2609/ already exists.
[AWS]: Folder s3://pitch-ml/subjects/2609/csv_raw/ already exists.
[AWS]: Folder s3://pitch-ml/subjects/2609/csv_processed/ already exists.
[AWS]: Folder s3://pitch-ml/subjects/2609/trc_raw/ already exists.
[AWS]: Folder s3://pitch-ml/subjects/2609/trc_processed/ already exists.
[AWS]: Created folder s3://pitch-ml/subjects/2609/inverse_kinematics/
[AWS]: Created folder s3://pitch-ml/subjects/2609/inverse_dynamics/
[AWS]: Created folder s3://pitch-ml/subjects/2609/osim/
[AWS]: Folder s3://pitch-ml/subjects/2610/ already exists.
[AWS]: Folder s3://pitch-ml/subjects/2610/csv_raw/ already exists.
[AWS]: Folder s3://pitch-ml/subjects/2610/csv_processed/ already exists.
[AWS]: Folder s3://pitch-ml/subjects/2610/trc_raw/ already exists.
[AWS]: Folder s3://pitch-ml/subjects/2610/trc_processed/ already exists.
[AWS]: Created folder s3://pitch-ml/subjects/2610/inverse_kinematics/
[AWS]: Created folder s3://pitch-ml/subjects/2610/inverse_