In [2]:
# PATH = '/kaggle/input/tlvmc-parkinsons-freezing-gait-prediction/'
PATH = '.'
import os
import pandas as pd
import numpy as np

def load_and_combine_data(path):
    data_list = []
    folders = ['tdcsfog', 'defog', 'notype']

    for folder in folders:
        folder_path = os.path.join(path, folder)

        for file in os.listdir(folder_path):
            file_path = os.path.join(folder_path, file)
            data = pd.read_csv(file_path)
            data['Source'] = folder
            data['SeriesId'] = file.split('.')[0]  # Use the filename (without the extension) as the SeriesId
            data_list.append(data)

    combined_data = pd.concat(data_list, ignore_index=True)
    return combined_data

def unify_sampling_rate(data):
    # Interpolate defog and notype data from 100Hz to 128Hz
    def interpolate_data(df):
        df['Time'] = pd.to_timedelta(df['Time'], unit='ms')  # Convert 'Time' column to timedelta
        df_resampled = df.set_index('Time').resample('7.8125ms').interpolate(method='linear').reset_index()
        return df_resampled

    data_defog = data[data['Source'].isin(['defog', 'notype'])].groupby('SeriesId').apply(interpolate_data).reset_index(drop=True)
    data_tdcsfog = data[data['Source'] == 'tdcsfog']

    unified_data = pd.concat([data_tdcsfog, data_defog], ignore_index=True)
    return unified_data


def unify_acceleration_units(data):
    g_to_ms2 = 9.81

    data.loc[data['Source'].isin(['defog', 'notype']), ['AccV', 'AccML', 'AccAP']] *= g_to_ms2
    return data

def filter_data(data):
    filtered_data = data[(data['Valid'].fillna(True)) & (data['Task'].fillna(True))].reset_index(drop=True)
    return filtered_data

data_path = "./train/"
data = load_and_combine_data(data_path)
data = unify_sampling_rate(data)
data = unify_acceleration_units(data)
filtered_data = filter_data(data)
filtered_data.head()

Unnamed: 0,Time,AccV,AccML,AccAP,StartHesitation,Turn,Walking,Source,SeriesId,Valid,Task,Event
0,0,-9.802023,-0.436305,-1.606585,0.0,0.0,0.0,tdcsfog,58b1cb50ad,,,
1,1,-9.80245,-0.443045,-1.599153,0.0,0.0,0.0,tdcsfog,58b1cb50ad,,,
2,2,-9.803275,-0.439261,-1.602125,0.0,0.0,0.0,tdcsfog,58b1cb50ad,,,
3,3,-9.801431,-0.436956,-1.597653,0.0,0.0,0.0,tdcsfog,58b1cb50ad,,,
4,4,-9.800925,-0.434741,-1.600627,0.0,0.0,0.0,tdcsfog,58b1cb50ad,,,


In [4]:
filtered_data.drop(['Valid', 'Task', 'Event', 'Source', 'SeriesId'], axis=1, inplace=True)
# Remove rows with missing values in the target columns
filtered_data = filtered_data.dropna(subset=['StartHesitation', 'Turn', 'Walking'])
filtered_data.to_csv('./processed.csv')

KeyError: "['Valid', 'Task', 'Event', 'Source', 'SeriesId'] not found in axis"

In [5]:
filtered_data.to_csv('./processed.csv')