In [1]:
# Import Necessary Libraries
import numpy as np
import os, csv, json
import math
import random
import operator
import collections
import pandas as pd

import pprint
import itertools
import matplotlib
from matplotlib import *
from matplotlib import pyplot as plt

from sklearn.decomposition import PCA
import scipy.stats as stats
import scipy.io
from scipy.spatial import distance as Distance

# pretty charting
import seaborn as sns
sns.set_palette('muted')
sns.set_style('darkgrid')

%matplotlib inline



# First Summarize Kinect Data
Subject 013
ON
Step Length Mean + SD
Cadence Mean + SD
Speed Mean + SD
Var step length + SD
Var Cadence Mean + SD
Var Speed Mean + SD
OFF
Repeat 1-6. 

In [2]:
######## Get list of files (.mat) we want to work with ########
filedir = '../02_StepAnalysis/Processed_StepLength/'
files = []
debug = 0

# get kinect files
for file in os.listdir(filedir):
    if file.endswith('.mat'):
        files.append(file)
    
######## Load in EVENTS struct to find correct events
eparcDir = '../05_Eparc/Processed_Eparc/'
eparcfiles = []
eparcsubjs = {}

# add eparc files
for file in os.listdir(eparcDir):
    if file.endswith('.mat'):
        eparcfiles.append(file)
        eparcsubjs[file.split('_')[1]] = file

# determine which subjects to analyze (e.g. ones with both eparc and kinect)
subjs_to_analyze = []
for file in files:
    subj = file.split('_')[1]
    if subj in eparcsubjs.keys():
        subjs_to_analyze.append([file, eparcsubjs[subj]])

subjs_to_analyze = np.array(subjs_to_analyze)     

# subjects with both pd on/off
onoffsubj = ['012', '013', '014', '015', '016', '017', '020', '021', '022']

print "the subjects with on and off testing were: ", onoffsubj
if debug:
    print "The files we can analyze are: "
    print subjs_to_analyze

the subjects with on and off testing were:  ['012', '013', '014', '015', '016', '017', '020', '021', '022']


## Get Data
For all metrics:
steplength, cadence, speed, stepwidth


In [8]:
stepLength = {}
speed = {}
cadence = {}
steps = {}
stepWidth = {}

for idx, subj in enumerate(subjs_to_analyze):
    filesplit = subj[0].split('_')
    subj_num = str(filesplit[1])
    
    try:
        onoff = int(filesplit[2])
    except:
        onoff = None
        
    # whole file location of each subject's mat file
    kinectfile = filedir+subj[0]
    eparcfile = eparcDir+subj[1]
    
    # load in eparc and kinect data structs
    kinectdata = scipy.io.loadmat(kinectfile)
    kinectdata = kinectdata[sorted(kinectdata.keys())[0]]
    eparcdata = scipy.io.loadmat(eparcfile)
    eparcdata = eparcdata[sorted(eparcdata.keys())[0]]

    ## 01: Load in step lengths and speed as meters  
    eparc_steplength = eparcdata['StepLength'][0][0]/100
    eparc_speed      = eparcdata['Speed'][0][0]/100
    eparc_stepwidth  = eparcdata['StepWidth'][0][0]/100
    
    kinect_steplength = kinectdata['pks'][0][0]
    kinect_speed = kinectdata['velocity'][0][0]
    kinect_cadence = kinectdata['cadence'][0][0]
    kinect_step = len(kinect_steplength)
    
    fullset = 1
    # if there is an on/off pair dataset, handle differently
    if onoff:
        subj_num = subj_num + '-' + str(onoff)

        if len(eparc_steplength) == 18:
            if onoff == 1:
                eparc_steplength = eparc_steplength[0:9]
                eparc_stepwidth = eparc_stepwidth[0:9]
                eparc_speed = eparc_speed[0:9]
            elif onoff == 2:
                eparc_steplength = eparc_steplength[9:18]
                eparc_stepwidth = eparc_stepwidth[9:18]
                eparc_speed = eparc_speed[9:18]
        else: # not full data set from eparc
            fullset = 0
    else:
        ## 01: Load in step lengths
        eparc_steplength = eparcdata['StepLength'][0][0]/100
        eparc_speed      = eparcdata['Speed'][0][0]/100
        eparc_stepwidth  = eparcdata['StepWidth'][0][0]/100

        kinect_stepLength = kinectdata['pks'][0][0]
        kinect_velocity = kinectdata['velocity'][0][0]
        kinect_cadence = kinectdata['cadence'][0][0]
      
    flag = 0
    if any(np.isnan(eparc_steplength)) or any(np.isnan(eparc_speed)) or any(np.isnan(eparc_stepwidth)):
        print "Eparc has nan at: ", subj
        flag = 1
    if any(np.isnan(kinect_steplength)) or any(np.isnan(kinect_speed)) or any(np.isnan(kinect_cadence)):
        print "Kinect has nan at: ", subj
        flag = 1
    
    if flag == 0 and fullset == 1:
        ## 02: Put all data into nested dictionary
        stepLength[subj_num] = {}
        speed[subj_num] = {}
        cadence[subj_num] = {}
        steps[subj_num] = {}
        stepWidth[subj_num] = {}

        stepLength[subj_num]['kinect'] = kinect_steplength
        stepLength[subj_num]['eparc'] = eparc_steplength
        speed[subj_num]['kinect'] = kinect_speed
        speed[subj_num]['eparc'] = eparc_speed
        cadence[subj_num]['kinect'] = kinect_cadence
        steps[subj_num]['kinect'] = kinect_step
        stepWidth[subj_num]['eparc'] = eparc_stepwidth

Eparc has nan at:  ['Subj_002_Step.mat' 'Subj_002_EPARC.mat']
Eparc has nan at:  ['Subj_003_Step.mat' 'Subj_003_EPARC.mat']
Eparc has nan at:  ['Subj_016_2_Step.mat' 'Subj_016_EPARC.mat']
Eparc has nan at:  ['Subj_019_Step.mat' 'Subj_019_EPARC.mat']


In [14]:
# Reorganize into kinect sub dict
kinect_temp = {}
eparc_temp = {}

for subj in stepLength.keys():
    kinect_temp[subj] = {}
    kinect_temp[subj]['stepLength'] = stepLength[subj]['kinect']
    kinect_temp[subj]['speed'] = speed[subj]['kinect']
    kinect_temp[subj]['cadence'] = cadence[subj]['kinect']
    kinect_temp[subj]['steps'] = steps[subj]['kinect']
    
    eparc_temp[subj] = {}
    eparc_temp[subj]['stepLength'] = stepLength[subj]['eparc']
    eparc_temp[subj]['speed'] = speed[subj]['eparc']
    eparc_temp[subj]['stepWidth'] = stepWidth[subj]['eparc']

In [19]:
kinect = {}
eparc = {}

for subj in sorted(kinect_temp.keys()):
    kinect[subj] = {}
    for idx, metric in enumerate(sorted(kinect_temp[subj].keys())):
#         data_to_print = reject_outliers(kinect_temp[subj][metric])
        data_to_print = kinect_temp[subj][metric]
        
        avge = "{0:.3f}".format(np.mean(data_to_print))
        std = "{0:.3f}".format(np.std(data_to_print))
        
        kinect[subj][metric] = [avge, std]
        
for subj in sorted(eparc_temp.keys()):
    eparc[subj] = {}
    for idx, metric in enumerate(sorted(eparc_temp[subj].keys())):
#         data_to_print = reject_outliers(eparc_temp[subj][metric])
        data_to_print = eparc_temp[subj][metric]
        
        avge = "{0:.3f}".format(np.mean(data_to_print))
        std = "{0:.3f}".format(np.std(data_to_print))
        
        eparc[subj][metric] = [avge, std]        

In [5]:
def reject_outliers(data, m = 2.):
    d = np.abs(data - np.median(data))
    mdev = np.median(d)
    s = d/mdev if mdev else 0.
    return data[s<m]

def pretty(d, indent=0):
    for key, value in d.iteritems():
        print '\t' * indent + str(key)
        if isinstance(value, dict):
            pretty(value, indent+1)
        else:
            print '\t' * (indent+1) + str(value)
            
# define patient groups
pdon_firstgroup = ['001', '002', '003', '004', '005', '006', '007'\
              '008', '009', '010']
control_group = ['101', '102', '103', '104', '105', '106', '107', '108', '109', '110',\
                   '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122']
pdoff_group = ['011-1', '012-1', '013-1', '014-1', '015-1',\
                    '016-1', '017-1', '018-1', '019-1', '020-1', '021-1', '022-1']
pdon_secondgroup = ['011-2', '012-2', '013-2', '014-2', '015-2',\
                    '016-2', '017-2', '018-2', '019-2', '020-2', '021-2', '022-2']


## Put Metric Modality Data into CSV File

In [22]:
filename = "table/kinectSubjSummary.csv"

with open(filename, 'w') as output:
    writer = csv.writer(output, lineterminator = '\n')
    writer.writerow(['subject', 'Step Length', '# Steps', 'Speed'])
    
    for idx, subj in enumerate(sorted(kinect.keys())):
        stepL_avge = kinect[subj]['stepLength'][0]
        stepL_std = kinect[subj]['stepLength'][1]
        step_avge = kinect[subj]['steps'][0]
        step_std = kinect[subj]['steps'][1]
        speed_avge = kinect[subj]['speed'][0]
        speed_std = kinect[subj]['speed'][1]
        
        stepL_data = str(stepL_avge) + ' +/- ' + str(stepL_std)
        step_data = str(step_avge)
        speed_data = str(speed_avge) + ' +/- ' + str(speed_std)
        writer.writerow([subj, stepL_data, step_data, speed_data])
print "finished"

finished


In [23]:
filename = "table/eparcSubjSummary.csv"

with open(filename, 'w') as output:
    writer = csv.writer(output, lineterminator = '\n')
    writer.writerow(['subject', 'Step Length', '# Steps', 'Speed'])
    
    for idx, subj in enumerate(sorted(eparc.keys())):
        stepL_avge = eparc[subj]['stepLength'][0]
        stepL_std = eparc[subj]['stepLength'][1]
        stepW_avge = eparc[subj]['stepWidth'][0]
        stepW_std = eparc[subj]['stepWidth'][1]
        speed_avge = eparc[subj]['speed'][0]
        speed_std = eparc[subj]['speed'][1]
        
        stepL_data = str(stepL_avge) + ' +/- ' + str(stepL_std)
        stepW_data = str(stepW_avge) + ' +/- ' + str(stepW_std)
        speed_data = str(speed_avge) + ' +/- ' + str(speed_std)
        writer.writerow([subj, stepL_data, stepW_data, speed_data])
print "finished"

finished
