## Process Analog Data

In [14]:
import os
from ezc3d import c3d
from lisa.config import PILOT_DATA_DIR, INTERIM_DATA_DIR
import polars as pl

In [65]:
def find_column_names(c):
    columns = c["parameters"]["ANALOG"]['LABELS']['value']

    # Step 1: Count total occurrences of each item
    total_occurrences = {}
    for item in columns:
        if item in total_occurrences:
            total_occurrences[item] += 1
        else:
            total_occurrences[item] = 1

    modified_columns = []
    current_counts = {}

    for item in columns:
        # Only label items that appear more than once
        if total_occurrences[item] > 1:
            if item in current_counts:
                current_counts[item] += 1
            else:
                current_counts[item] = 1
            modified_item = f"D{current_counts[item]}_{item}"
        else:
            # If the item appears only once, keep it as is
            modified_item = item
        
        modified_columns.append(modified_item)

    return modified_columns


In [66]:
def drop_columns(df):
    to_remove = ['Force', 'Moment', 'Velocity', 'Angle.Pitch', 'Length.Sway']

    columns_to_remove = [col for col in df.columns if any(sub in col for sub in to_remove)]

    return df.drop(columns_to_remove)

In [75]:

def find_activity_category(filename, activity_categories):
    for activity in activity_categories:
        if activity in filename.lower():
            return activity
    return None  # Return None or a default value if no match is found


In [76]:
activity_categories = ['walk', 'jog', 'run', 'jump']
total_df = None

for filename in os.listdir(PILOT_DATA_DIR):
    # Ignore any non-c3d files or files that don't start with the activity categories, i.e. calibration files
    if filename.endswith(".c3d") and any(activity in filename.lower() for activity in activity_categories):
        print(filename)
        file = os.path.join(PILOT_DATA_DIR, filename)

        c3d_contents = c3d(file)

        analogs = c3d_contents['data']['analogs']
        df = pl.DataFrame(analogs[0].T)
        df.columns = find_column_names(c3d_contents)

        df = drop_columns(df)

        df = df.with_columns(pl.lit(find_activity_category(filename, activity_categories)).alias('ACTIVITY'))

        if total_df is None:
            total_df = df   
        else:
            total_df = total_df.vstack(df)

total_df.write_csv(INTERIM_DATA_DIR / 'pilot_data.csv')

        

Walking 1_4ms01.c3d
VerticalJump01.c3d
Walk1_4ms 10incline01.c3d
Jogging 2_2.c3d
Walk1_4ms 5incline01.c3d
Walking 1_4ms02.c3d
VerticalJump02.c3d
Jogging 2_5ms01.c3d
Walking 1_7ms02.c3d
Running3_0ms.c3d
Walking 1_0ms.c3d
Walk1_4ms 10decline01.c3d
Walk1_4ms 5decline01.c3d
Walk1_4ms01.c3d
VerticalJump.c3d
Jogging 2_2._ForOpensim.c3d
Walk1_1ms.c3d
Walk1_7ms 5decline02.c3d
Walk1_1ms01.c3d
Jogging 2_5ms.c3d
Walk1_1ms 5incline.c3d
Walk1_7ms 5incline02.c3d
Walk1_1ms 5decline.c3d
Walk1_1ms 10incline.c3d
Walk1_1ms 10decline.c3d
Walk1_4ms.c3d


In [77]:
total_df['ACTIVITY'].value_counts()

ACTIVITY,count
str,u32
"""walk""",540000
"""run""",30000
"""jump""",90000
"""jog""",120000
