In [1]:
import pandas as pd
from connections.aws import AWS

In [2]:
""" INITIALIZE AWS CONNECTION """
aws_connection = AWS()
aws_connection.connect()

[AWS]: Port 5433 is free.
[AWS]: Connected to RDS endpoint.


$\textbf{Butterworth Filtering}$

See underlying details in `dev/biomech/dev_marker_filtering.ipynb`.

In [3]:
from biomech.processing.trc import *
from biomech.algorithms import butter_lowpass_filter

In [4]:
# load subject info
subject_info = aws_connection.load_subject_info()

# load trc files
s3_objects = aws_connection.list_s3_objects(prefix='subjects/')
trc_files = [obj for obj in s3_objects if obj.endswith('.trc') and '_static' not in obj and '_filtered' not in obj]        # filter for trc files

In [9]:
# error storage
errors = []

# iterate through trc files, apply filter, and upload to S3
for file in trc_files:
    
    # get subject info (ID, throws) if in subject_info; otherwise skip to next file
    subject_id = file.split('/')[1]
    if int(subject_id) in subject_info['subject_id'].values:
        subject_throws = subject_info[subject_info['subject_id'] == int(subject_id)]['throws'].values[0]
    else:
        continue
    
    try:
        # read trc file from S3
        trc_bytes = aws_connection.load_s3_object(file, return_info=False)
        trc_body = parse_trc_body(trc_bytes, adjust_time=True)
        
        # apply filter to trc body
        trc_filt_body = butter_lowpass_filter(
            data=trc_body, 
            columns=[col for col in trc_body.columns if col not in ['Frame#', 'Time']], 
            cutoff=18, 
            fs=480.0, 
            order=2
        )

        # create header (w/ _filtered file name)
        file_name = f"{file.split('/')[-1].split('.')[0]}_filtered.trc"
        trc_filt_header = create_trc_header(
            file_name,
            trc_filt_body,
            throwing_hand=subject_throws,
            frame_rate=480
        )
        
        # upload to S3 (trc_processed)
        upload_path = f'subjects/{subject_id}/trc_processed/{file_name}'
        aws_connection.upload_trc_to_s3(
        trc_filt_header,
        trc_filt_body,
        s3_key=upload_path
    )

    except Exception as e:
        print(f"Error processing file {file}: {e}")
        
        # log error with file name and error message
        errors.append({
            'file': file,
            'error': str(e)
        })

        continue


[AWS]: TRC file written to s3://pitch-ml/subjects/2609/trc_processed/2609_01_filtered.trc
[AWS]: TRC file written to s3://pitch-ml/subjects/2609/trc_processed/2609_02_filtered.trc
[AWS]: TRC file written to s3://pitch-ml/subjects/2609/trc_processed/2609_03_filtered.trc
[AWS]: TRC file written to s3://pitch-ml/subjects/2609/trc_processed/2609_04_filtered.trc
[AWS]: TRC file written to s3://pitch-ml/subjects/2609/trc_processed/2609_05_filtered.trc
[AWS]: TRC file written to s3://pitch-ml/subjects/2609/trc_processed/2609_06_filtered.trc
[AWS]: TRC file written to s3://pitch-ml/subjects/2609/trc_processed/2609_07_filtered.trc
[AWS]: TRC file written to s3://pitch-ml/subjects/2609/trc_processed/2609_08_filtered.trc
[AWS]: TRC file written to s3://pitch-ml/subjects/2609/trc_processed/2609_09_filtered.trc
[AWS]: TRC file written to s3://pitch-ml/subjects/2609/trc_processed/2609_10_filtered.trc
Error processing file subjects/2609/trc_raw/2609_11.trc: The length of the input vector x must be gr

In [10]:
# write error trials to file for later investigation
error_trials_summary = pd.DataFrame(errors)
aws_connection.upload_to_s3(
    error_trials_summary,
    s3_key=f'subjects/summary/error_filtering_summary.csv'
)

[AWS]: Uploaded object to s3://pitch-ml/subjects/summary/error_filtering_summary.csv


In [11]:
# close connection
aws_connection.close()

[AWS]: Database connection closed.
[AWS]: SSH tunnel stopped.
