In [1]:
import pandas as pd
import scipy.io as sio
import numpy as np
import os

# Features in Parkinson dataset
cols = {0:'acc_x', 1:'acc_y', 2:'acc_z', 3:'vel_x', 4:'vel_y', 5:'vel_z', 6:'label'}

# Classes / labels in Parkinson dataset
labeldict = {1.0: 'labelFreezingOfGait',
             2.0: 'labelBradykinesia', 
             3.0: 'labelSittingWithTremor',
             4.0: 'labelAtaxicGait',
             5.0: 'labelMyopathicGait',
             6.0: 'labelMuscleAtrophy',
             7.0: 'labelNoPathologicalGait'}

# Load data in dict: every key is pandas dataframe for 1 participant
data={}
source = './datasets/simulation-of-parkinson-movement-disorders/'
for root, dirs, filenames in os.walk(source):
    for fn in filenames:
        if 'dataset_' in fn:
            # load relevant .mat files
            df = pd.DataFrame(sio.loadmat(source+fn)[fn[:-4]])
            
            # rename columns to features
            df = df.rename(index=str, columns=cols)
            
            # rename labels to classes
            df['label'] = df['label'].map(labeldict)
            
            # Pandas datetime does not have centiseconds, so translate index to milliseconds
            df.index = np.arange(0,len(df.index)*10,10)
            df.index = pd.to_datetime(df.index, unit='ms')
            
            # one hot encode labels
            df = df.join(pd.get_dummies(df['label']))
            df=df.drop('label', axis=1)
            
            # add dataframe to data dictionary
            data[fn[:-4]] = df
            
# todo: add time component (100 Hz)

In [3]:
# see e.g. dataset of 1st participant
data['dataset_1Giorgia']

Unnamed: 0,acc_x,acc_y,acc_z,vel_x,vel_y,vel_z,label
1970-01-01 00:00:00.000,-0.210084,0.129076,0.968192,3.29,-6.16,0.98,labelFreezingOfGait
1970-01-01 00:00:00.010,-0.190320,0.133224,0.972950,2.45,-6.65,0.63,labelFreezingOfGait
1970-01-01 00:00:00.020,-0.213866,0.134932,0.969656,1.33,-4.62,-0.49,labelFreezingOfGait
1970-01-01 00:00:00.030,-0.208132,0.128954,0.973926,0.70,-5.46,-1.61,labelFreezingOfGait
1970-01-01 00:00:00.040,-0.212158,0.126514,0.974536,0.56,-4.97,-1.47,labelFreezingOfGait
1970-01-01 00:00:00.050,-0.212890,0.128710,0.972096,0.35,-5.39,-1.26,labelFreezingOfGait
1970-01-01 00:00:00.060,-0.212036,0.120902,0.977586,0.77,-5.32,-1.12,labelFreezingOfGait
1970-01-01 00:00:00.070,-0.212524,0.124562,0.975756,1.33,-5.39,-0.56,labelFreezingOfGait
1970-01-01 00:00:00.080,-0.210084,0.125416,0.966362,1.68,-5.74,-0.07,labelFreezingOfGait
1970-01-01 00:00:00.090,-0.191296,0.137616,0.970754,0.63,-5.95,0.00,labelFreezingOfGait


# 1. Handling noise and missing values

1259

# 2. Feature engineering

# 3. Predictive modeling with / without (choose / do both) notion of time