In [2]:
# Import Necessary Libraries
import numpy as np
import os, csv, json
import math
import random
import operator
import collections
import pandas as pd

import pprint
import itertools
import matplotlib
from matplotlib import *
from matplotlib import pyplot as plt

from sklearn.decomposition import PCA
import scipy.stats as stats
import scipy.io
from scipy.spatial import distance as Distance

# pretty charting
import seaborn as sns
sns.set_palette('muted')
sns.set_style('darkgrid')

%matplotlib inline

In [3]:
######## Get list of files (.mat) we want to work with ########
filedir = '../02_StepAnalysis/Processed_StepLength/'
files = []
debug = 0

# get kinect files
for file in os.listdir(filedir):
    if file.endswith('.mat'):
        files.append(file)
    
######## Load in EVENTS struct to find correct events
eparcDir = '../05_Eparc/Processed_Eparc/'
eparcfiles = []
eparcsubjs = {}

# add eparc files
for file in os.listdir(eparcDir):
    if file.endswith('.mat'):
        eparcfiles.append(file)
        eparcsubjs[file.split('_')[1]] = file

# determine which subjects to analyze (e.g. ones with both eparc and kinect)
subjs_to_analyze = []
for file in files:
    subj = file.split('_')[1]
    if subj in eparcsubjs.keys():
        subjs_to_analyze.append([file, eparcsubjs[subj]])

subjs_to_analyze = np.array(subjs_to_analyze)     

# subjects with both pd on/off
onoffsubj = ['012', '013', '014', '015', '016', '017', '020', '021', '022']

print "the subjects with on and off testing were: ", onoffsubj
if debug:
    print "The files we can analyze are: "
    print subjs_to_analyze

the subjects with on and off testing were:  ['012', '013', '014', '015', '016', '017', '020', '021', '022']


# Get Data 


In [4]:
stepLength = {}
speed = {}
cadence = {}
steps = {}

for idx, subj in enumerate(subjs_to_analyze):
    filesplit = subj[0].split('_')
    subj_num = str(filesplit[1])
    
    try:
        onoff = int(filesplit[2])
    except:
        onoff = None
        
    # whole file location of each subject's mat file
    kinectfile = filedir+subj[0]
    eparcfile = eparcDir+subj[1]
    
    # load in eparc and kinect data structs
    kinectdata = scipy.io.loadmat(kinectfile)
    kinectdata = kinectdata[sorted(kinectdata.keys())[0]]
    eparcdata = scipy.io.loadmat(eparcfile)
    eparcdata = eparcdata[sorted(eparcdata.keys())[0]]

    ## 01: Load in step lengths and speed as meters  
    eparc_steplength = eparcdata['StepLength'][0][0]/100
    eparc_speed      = eparcdata['Speed'][0][0]/100
    eparc_stepwidth  = eparcdata['StepWidth'][0][0]/100
    
    kinect_steplength = kinectdata['pks'][0][0]
    kinect_speed = kinectdata['velocity'][0][0]
    kinect_cadence = kinectdata['cadence'][0][0]
    kinect_step = len(kinect_steplength)
    
    fullset = 1
    # if there is an on/off pair dataset, handle differently
    if onoff:
        subj_num = subj_num + '-' + str(onoff)

        if len(eparc_steplength) == 18:
            if onoff == 1:
                eparc_steplength = eparc_steplength[0:9]
                eparc_stepwidth = eparc_stepwidth[0:9]
                eparc_speed = eparc_speed[0:9]
            elif onoff == 2:
                eparc_steplength = eparc_steplength[9:18]
                eparc_stepwidth = eparc_stepwidth[9:18]
                eparc_speed = eparc_speed[9:18]
        else: # not full data set from eparc
            fullset = 0
    else:
        ## 01: Load in step lengths
        eparc_steplength = eparcdata['StepLength'][0][0]/100
        eparc_speed      = eparcdata['Speed'][0][0]/100
        eparc_stepwidth  = eparcdata['StepWidth'][0][0]/100

        kinect_stepLength = kinectdata['pks'][0][0]
        kinect_velocity = kinectdata['velocity'][0][0]
        kinect_cadence = kinectdata['cadence'][0][0]
      
    flag = 0
    if any(np.isnan(eparc_steplength)) or any(np.isnan(eparc_speed)) or any(np.isnan(eparc_stepwidth)):
        print "Eparc has nan at: ", subj
        flag = 1
    if any(np.isnan(kinect_steplength)) or any(np.isnan(kinect_speed)) or any(np.isnan(kinect_cadence)):
        print "Kinect has nan at: ", subj
        flag = 1
    
    if flag == 0 and fullset == 1:
        ## 02: Put all data into nested dictionary
        stepLength[subj_num] = {}
        speed[subj_num] = {}

        stepLength[subj_num]['kinect'] = kinect_steplength
        stepLength[subj_num]['eparc'] = eparc_steplength
        speed[subj_num]['kinect'] = kinect_speed
        speed[subj_num]['eparc'] = eparc_speed
        cadence[subj_num] = kinect_cadence
        steps[subj_num] = kinect_step

Eparc has nan at:  ['Subj_002_Step.mat' 'Subj_002_EPARC.mat']
Eparc has nan at:  ['Subj_003_Step.mat' 'Subj_003_EPARC.mat']
Eparc has nan at:  ['Subj_016_2_Step.mat' 'Subj_016_EPARC.mat']
Eparc has nan at:  ['Subj_019_Step.mat' 'Subj_019_EPARC.mat']


In [5]:
# don't get any of the ones with nan, or not full eparc data
print(sorted(stepLength.keys()))

allsubjnum = ['001', '005', '006', '007', '008', '009', '010', '012-1', '012-2', '013-1', '013-2', '014-1', '014-2', '015-1', '015-2', '016-1', '017-1', '017-2', '018', '020-1', '020-2', '021-1', '021-2', '022-1', '022-2', '103', '104', '105', '106', '107', '108', '109', '110', '112', '113', '114', '115', '117', '118', '119', '120', '121']

['001', '005', '006', '007', '008', '009', '010', '013-1', '013-2', '014-1', '014-2', '015-1', '015-2', '016-1', '017-1', '017-2', '018', '020-1', '020-2', '021-1', '021-2', '022-1', '022-2', '103', '104', '105', '106', '107', '108', '109', '110', '112', '113', '114', '115', '117', '118', '119', '120', '121']


Now we have dicts, 
stepLength
speed
cadence
steps

and we can compute averages +/- std after we split allsubjnum into pdoff, pdon, control, pdonwithoff

In [6]:
def reject_outliers(data, m = 2.):
    d = np.abs(data - np.median(data))
    mdev = np.median(d)
    s = d/mdev if mdev else 0.
    return data[s<m]

def pretty(d, indent=0):
    for key, value in d.iteritems():
        print '\t' * indent + str(key)
        if isinstance(value, dict):
            pretty(value, indent+1)
        else:
            print '\t' * (indent+1) + str(value)
            
# define patient groups
pdon_firstgroup = ['001', '002', '003', '004', '005', '006', '007'\
              '008', '009', '010']
control_group = ['101', '102', '103', '104', '105', '106', '107', '108', '109', '110',\
                   '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122']
pdoff_group = ['011-1', '012-1', '013-1', '014-1', '015-1',\
                    '016-1', '017-1', '018-1', '019-1', '020-1', '021-1', '022-1']
pdon_secondgroup = ['011-2', '012-2', '013-2', '014-2', '015-2',\
                    '016-2', '017-2', '018-2', '019-2', '020-2', '021-2', '022-2']

# Put Step Length Kinect/Eparc For Each Patient into TEXT File


In [7]:
############## PRINTING INTO TABLE FOR EACH PATIENT KINECT AND EPARC STEP LENGTH ############
subj_step = {}

# lists to store the avge for each grouping
pdoff_avge = []
pdon_firstavge = []
control_avge = []
pdon_secondavge = []

for subj in sorted(stepLength.keys()):  
    subj_step[subj] = {}
    # create subplots for steplength and speed
    for idx, metric in enumerate(sorted(stepLength[subj].keys())):
        data_to_print = reject_outliers(stepLength[subj][metric])
        data_to_print = stepLength[subj][metric]
        
        avge = "{0:.3f}".format(np.mean(data_to_print))
        std = "{0:.3f}".format(np.std(data_to_print))
        
#         if subj in pdon_firstgroup and metric == 'kinect':
#             pdon_firstavge.append(float(avge))
#         elif subj in pdon_secondgroup and metric == 'kinect':
#             pdon_secondavge.append(float(avge))
#         elif subj in pdoff_group and metric == 'kinect':
#             pdoff_avge.append(float(avge))
#         elif subj in control_group and metric == 'kinect':
#             control_avge.append(float(avge))
            
        if subj in pdon_firstgroup and metric == 'eparc':
            pdon_firstavge.append(float(avge))
        elif subj in pdon_secondgroup and metric == 'eparc':
            pdon_secondavge.append(float(avge))
        elif subj in pdoff_group and metric == 'eparc':
            pdoff_avge.append(float(avge))
        elif subj in control_group and metric == 'eparc':
            control_avge.append(float(avge))
        
        subj_step[subj][metric] = [avge, std]
        
        print "subject: ", subj
        print metric, ": ", avge, "+/-", std

subject:  001
eparc :  0.632 +/- 0.066
subject:  001
kinect :  0.178 +/- 0.111
subject:  005
eparc :  0.600 +/- 0.028
subject:  005
kinect :  0.418 +/- 0.104
subject:  006
eparc :  0.565 +/- 0.128
subject:  006
kinect :  0.296 +/- 0.194
subject:  007
eparc :  0.617 +/- 0.025
subject:  007
kinect :  0.319 +/- 0.165
subject:  008
eparc :  0.557 +/- 0.075
subject:  008
kinect :  0.266 +/- 0.174
subject:  009
eparc :  0.375 +/- 0.021
subject:  009
kinect :  0.217 +/- 0.148
subject:  010
eparc :  0.651 +/- 0.030
subject:  010
kinect :  0.251 +/- 0.191
subject:  013-1
eparc :  0.659 +/- 0.024
subject:  013-1
kinect :  0.332 +/- 0.215
subject:  013-2
eparc :  0.841 +/- 0.100
subject:  013-2
kinect :  0.411 +/- 0.236
subject:  014-1
eparc :  0.579 +/- 0.078
subject:  014-1
kinect :  0.245 +/- 0.187
subject:  014-2
eparc :  0.626 +/- 0.020
subject:  014-2
kinect :  0.179 +/- 0.169
subject:  015-1
eparc :  0.559 +/- 0.116
subject:  015-1
kinect :  0.359 +/- 0.149
subject:  015-2
eparc :  0.923 +

In [21]:
fileName = "table/patientStepSummary.csv"

with open(fileName, 'w') as output:
    writer = csv.writer(output, lineterminator='\n')
    
    writer.writerow(['subject', 'kinect', 'eparc'])
    
    # loop through each subject and write out the data
    for idx, subj in enumerate(sorted(subj_step.keys())):
        kinect_avge = subj_step[subj]['kinect'][0]
        kinect_std = subj_step[subj]['kinect'][1]
        kinect_data = str(kinect_avge) + ' +/- ' + str(kinect_std)

        eparc_avge = subj_step[subj]['eparc'][0]
        eparc_std = subj_step[subj]['eparc'][1]
        eparc_data = str(eparc_avge) + ' +/- ' + str(eparc_std)
        writer.writerow([subj, kinect_data, eparc_data])
print "finished"

finished


# Put Cadence Kinect/Eparc For Each Patient into TEXT File


# Put Speed Kinect/Eparc For Each Patient into TEXT File


In [22]:
############## PRINTING INTO TABLE FOR EACH PATIENT KINECT AND EPARC STEP LENGTH ############
subj_speed = {}

# lists to store the avge for each grouping
pdoff_avge = []
pdon_firstavge = []
control_avge = []
pdon_secondavge = []

for subj in sorted(speed.keys()):  
    subj_speed[subj] = {}
    # create subplots for steplength and speed
    for idx, modality in enumerate(sorted(speed[subj].keys())):
        data_to_print = reject_outliers(speed[subj][modality])
        data_to_print = speed[subj][modality]
        
        avge = "{0:.3f}".format(np.mean(data_to_print))
        std = "{0:.3f}".format(np.std(data_to_print))
        
        # store average and standard deviation 
        subj_speed[subj][modality] = [avge, std]
        
        print "subject: ", subj
        print modality, ": ", avge, "+/-", std

subject:  001
eparc :  1.460 +/- 1.799
subject:  001
kinect :  0.322 +/- 0.444
subject:  005
eparc :  0.784 +/- 0.095
subject:  005
kinect :  1.077 +/- 0.737
subject:  006
eparc :  0.825 +/- 0.127
subject:  006
kinect :  0.871 +/- 0.744
subject:  007
eparc :  0.721 +/- 0.043
subject:  007
kinect :  0.945 +/- 0.687
subject:  008
eparc :  0.722 +/- 0.066
subject:  008
kinect :  0.667 +/- 0.587
subject:  009
eparc :  0.561 +/- 0.067
subject:  009
kinect :  0.644 +/- 0.530
subject:  010
eparc :  0.745 +/- 0.076
subject:  010
kinect :  0.497 +/- 0.660
subject:  013-1
eparc :  0.889 +/- 0.106
subject:  013-1
kinect :  0.775 +/- 0.878
subject:  013-2
eparc :  0.985 +/- 0.077
subject:  013-2
kinect :  1.287 +/- 0.996
subject:  014-1
eparc :  0.735 +/- 0.051
subject:  014-1
kinect :  0.682 +/- 0.745
subject:  014-2
eparc :  0.856 +/- 0.103
subject:  014-2
kinect :  0.516 +/- 0.635
subject:  015-1
eparc :  0.771 +/- 0.078
subject:  015-1
kinect :  1.068 +/- 0.709
subject:  015-2
eparc :  0.947 +

In [None]:
fileName = "table/patientSpeedSummary.csv"

with open(fileName, 'w') as output:
    writer = csv.writer(output, lineterminator='\n')
    
    writer.writerow(['subject', 'kinect', 'eparc'])
    
    # loop through each subject and write out the data
    for idx, subj in enumerate(sorted(subj_step.keys())):
        kinect_avge = subj_speed[subj]['kinect'][0]
        kinect_std = subj_speed[subj]['kinect'][1]
        kinect_data = str(kinect_avge) + ' +/- ' + str(kinect_std)

        eparc_avge = subj_speed[subj]['eparc'][0]
        eparc_std = subj_speed[subj]['eparc'][1]
        eparc_data = str(eparc_avge) + ' +/- ' + str(eparc_std)
        writer.writerow([subj, kinect_data, eparc_data])
print "finished"

# Groups Summary For Speed, Steps, Step Length, Step Width

In [None]:
# define patient groups
pdon_firstgroup = ['001', '002', '003', '004', '005', '006', '007'\
              '008', '009', '010']
control_group = ['101', '102', '103', '104', '105', '106', '107', '108', '109', '110',\
                   '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122']
pdoff_group = ['011-1', '012-1', '013-1', '014-1', '015-1',\
                    '016-1', '017-1', '018-1', '019-1', '020-1', '021-1', '022-1']
pdon_secondgroup = ['011-2', '012-2', '013-2', '014-2', '015-2',\
                    '016-2', '017-2', '018-2', '019-2', '020-2', '021-2', '022-2']

# loop through subj_step, subj_stepnum, subj_speed, subj_stepwidth, etc.