# load data from Synapse ADI, perform a quaternion rotation.

In [None]:
%reset

In [None]:
import synapseclient
import numpy as np
import pandas as pd
import matplotlib.pylab as pl
import json
import peakutils
from peakutils.plot import plot as pplot
from scipy import signal, ndimage, io, stats
import scipy.integrate as integrate
from math import factorial
from scipy.stats import mode

In [None]:
# Login into the Synapse Client

# syn = synapseclient.Synapse()
syn = synapseclient.login()

# Enter login and password
#syn.login('login','password')

# Query demographics table

In [None]:
# demographicstable = syn.tableQuery('select * from syn5511429')

INPUT_DEMO_SYNID = "syn10146552"

demo_syntable = syn.tableQuery("SELECT * FROM syn10146552")
demo = demo_syntable.asDataFrame()
healthCodeList = ", ".join( repr(i) for i in demo["healthCode"]) 


# Query multiple datasets, starting from DeviceMotion

In [None]:
INPUT_WALKING_ACTIVITY_TABLE_SYNID = "syn10146553"
walkingtable = syn.tableQuery(('select * FROM {0} WHERE healthCode IN ({1}) AND "deviceMotion_walking_outbound.json.items" is not null LIMIT 500').format(INPUT_WALKING_ACTIVITY_TABLE_SYNID, healthCodeList))                              
walking_df = walkingtable.asDataFrame()
walking_df['idx'] = walking_df.index
walking_df = walking_df.drop(['createdOn', 'appVersion', 'phoneInfo','accel_walking_outbound.json.items',
                              'pedometer_walking_outbound.json.items','accel_walking_return.json.items',
                              'deviceMotion_walking_return.json.items','pedometer_walking_return.json.items',
                              'accel_walking_rest.json.items'],axis=1)

walking_df = walking_df.dropna()

filePaths_DMoutbound = syn.downloadTableColumns(walkingtable, ['deviceMotion_walking_outbound.json.items'])
filePaths_DMrest = syn.downloadTableColumns(walkingtable, ['deviceMotion_walking_rest.json.items'])

# store DeviceMotion datasets in dataframes and pickle them

In [None]:
demographics_df=demographicstable.asDataFrame()

wlist = np.arange(len(walking_df))

DMoutboundPathlist=[filePaths_DMoutbound[str(int(walking_df.ix[entry,'deviceMotion_walking_outbound.json.items']))] for entry in wlist] 
DMrestPathlist = [filePaths_DMrest[str(int(walking_df.ix[entry,'deviceMotion_walking_rest.json.items']))] for entry in wlist]

walking_df['DMoutboundPaths'] = DMoutboundPathlist
walking_df['DMrestPaths'] = DMrestPathlist

demographics_df.to_pickle('demographics_df.pkl')
walking_df.to_pickle('walking_df.pkl')

# perform a quaternion rotation on the accelerometer files

In [None]:
# This function makes sure that the each quaternion in the angular phone information vector is properly normalized. The
# magnitude should be close to 1 within some tolerance. If it is not, rescale the quaternion.

def normalize(v, tolerance=0.00001):
    mag2 = sum(n * n for n in v)
    if abs(mag2 - 1.0) > tolerance:
        mag = sqrt(mag2)
        v = tuple(n / mag for n in v)
    return v


In [None]:
# This funciton returns the conjugate of the quaternion

def get_quaternion_conjugate(quaternion):
    w, x, y, z = quaternion
    return (w, -x, -y, -z)

In [None]:
# This function multiples two quaternions

def multiply_quaternions(quaternion1, quaternion2):
    w1, x1, y1, z1 = quaternion1
    w2, x2, y2, z2 = quaternion2
    # rearranged a little for my own clarity
    w = (w1 * w2) - (x1 * x2) - (y1 * y2) - (z1 * z2)
    x = (w1 * x2) + (x1 * w2) + (y1 * z2) - (z1 * y2)
    y = (w1 * y2) - (x1 * z2) + (y1 * w2) + (z1 * x2)
    z = (w1 * z2) + (x1 * y2) - (y1 * x2) + (z1 * w2)  
    return w, x, y, z

def multiply_quaternionANDvector(q1, v1):
    q1 = normalize(q1,tolerance = 0.00001) 
    q2 = (0.0,) + v1
    return multiply_quaternions(multiply_quaternions(q1, q2), get_quaternion_conjugate(q1))[1:]

# combine into a datastructure from all datasets

In [None]:
dfdata= {'time':[],'rotX':[],'rotY':[],'rotZ':[],'total_rawacceleration':[],'rest_time':[],'rest_rotX':[],'rest_rotY':[],'rest_rotZ':[],'rest_total_rawacceleration':[],'healthCode':[]}
Accelerometer_df = pd.DataFrame(dfdata, columns =['time','rotX','rotY','rotZ','total_rawacceleration','rest_time','rest_rotX','rest_rotY','rest_rotZ','rest_total_rawacceleration','healthCode'])

# load accelerometer data

In [None]:
mypkls =['0_2500','2500_5000','5000_7500','7500_10000','10000_12500','12500_15000','15000_17500',
         '17500_20000','20000_22500','22500_23093']

for entry in range(0,2500):
    
    if entry % 250 == 0:
        print entry
        
    healthCode = walking_df['healthCode'][entry]
    recordId = walking_df['recordId'][entry]
    medTimepoint = walking_df['medTimepoint'][entry]
    walkPath = walking_df['DMoutboundPaths'][entry]
    restPath = walking_df['DMrestPaths'][entry]

    walkingrecord = [json.loads(line) for line in open(walkPath)]
    restrecord = [json.loads(line) for line in open(restPath)]
    
    walkingrecordlist=walkingrecord[0]
    restrecordlist=restrecord[0]
    
    barlist = np.arange(len(walkingrecordlist))
    restbarlist = np.arange(len(restrecordlist))
    
    time = []
    acceleration_vector = []
    attitude_quaternion = []
    rest_acceleration_vector = []
    attitude_quaternion = []
    
    time = [(walkingrecordlist[bar]['timestamp']) for bar in barlist]
    acceleration_vector = [(walkingrecordlist[bar]['userAcceleration']['x'],
                            walkingrecordlist[bar]['userAcceleration']['y'],
                            walkingrecordlist[bar]['userAcceleration']['z']) for bar in barlist]
    attitude_quaternion = [(walkingrecordlist[bar]['attitude']['w'],
                            walkingrecordlist[bar]['attitude']['x'],
                            walkingrecordlist[bar]['attitude']['y'],
                            walkingrecordlist[bar]['attitude']['z']) for bar in barlist]
    
    rest_time = [(restrecordlist[restbar]['timestamp']) for restbar in restbarlist]
    rest_acceleration_vector = [(restrecordlist[restbar]['userAcceleration']['x'],
                            restrecordlist[restbar]['userAcceleration']['y'],
                            restrecordlist[restbar]['userAcceleration']['z']) for bar in restbarlist]
    rest_attitude_quaternion = [(restrecordlist[restbar]['attitude']['w'],
                            restrecordlist[restbar]['attitude']['x'],
                            restrecordlist[restbar]['attitude']['y'],
                            restrecordlist[restbar]['attitude']['z']) for restbar in restbarlist]
    
    # Calculate quadratic mean of original acceleration signal - this will be used as another directionless "axis" in my feature calculations
    [X, Y, Z] = zip(*acceleration_vector) 
    total_rawacceleration = np.sqrt(np.square(X)+np.square(Y)+np.square(Z))
    [restX, restY, restZ] = zip(*rest_acceleration_vector) 
    rest_total_rawacceleration = np.sqrt(np.square(restX)+np.square(restY)+np.square(restZ))
    
    # Apply quaternion rotations here
    plist = np.arange(len(acceleration_vector))
    quatrot = [multiply_quaternionANDvector(attitude_quaternion[p],acceleration_vector[p]) for p in plist]
    [rotX, rotY, rotZ] = zip(*quatrot) 
    
    rlist = np.arange(len(rest_acceleration_vector))
    rest_quatrot = [multiply_quaternionANDvector(rest_attitude_quaternion[r],rest_acceleration_vector[r]) for r in rlist]
    [rest_rotX, rest_rotY, rest_rotZ] = zip(*rest_quatrot) 
    
    dfdata= {'time':[],'rotX':[],'rotY':[],'rotZ':[],'total_rawacceleration':[],'rest_time':[],'rest_rotX':[],'rest_rotY':[],'rest_rotZ':[],'rest_total_rawacceleration':[],'healthCode':[]}

    Accelerometer_df = Accelerometer_df.append({'time':time,'rotX':rotX,'rotY':rotY,'rotZ':rotZ,
                                                'total_rawacceleration':total_rawacceleration,'rest_time':rest_time,
                                                'rest_rotX':rest_rotX,'rest_rotY':rest_rotY,'rest_rotZ':rest_rotZ,
                                                'rest_total_rawacceleration':rest_total_rawacceleration,
                                                'healthCode':healthCode, 'recordId':recordId,'medTimepoint':medTimepoint},ignore_index=True)
    

# cross-reference accelerometer data with demographics data and pickle

In [None]:
# add relevant demographics data to walking activity feature data frame
Demographics_df = demographics_df.drop(['recordId','createdOn','appVersion','phoneInfo','are-caretaker', 'deep-brain-stimulation',
                                        'education','employment','health-history','healthcare-provider',
                                        'home-usage','last-smoked','maritalStatus','medical-usage',
                                        'medical-usage-yesterday','packs-per-day',
                                        'past-participation','phone-usage','race','smartphone',
                                        'smoked','surgery','video-usage','years-smoking',
                                        'diagnosis-year','medication-start-year','onset-year'],axis=1)
Combined_df = pd.merge(Accelerometer_df, Demographics_df, on='healthCode')

Final_df.to_pickle('Accelerometer_Demographics_df')

In [None]:
##############################################################################
# Plot a sample time series



pl.figure()
pl.plot(fpr, tpr, 'g',label='ROC curve (area = %0.2f)' % roc_auc)
pl.plot([0, 1], [0, 1], 'k--')
pl.xlim([0.0, 1.0])
pl.ylim([0.0, 1.05])
pl.xticks(color = 'k', size = 28)
pl.yticks(color = 'k', size = 28)
pl.xlabel('False Positive Rate',{'color':'k','fontsize': 28})
pl.ylabel('True Positive Rate',{'color':'k','fontsize': 28})
pl.title('Receiver operating characteristic',{'color':'k','fontsize': 28})
pl.legend(loc="lower right",fontsize= 18)
pl.show()

# load necessary data into feature spaces

In [None]:
featureinfo = {'healthCode':[],'record':[],'age':[],'gender':[],'professional-diagnosis':[],'recordId':[],
               'totpowerX':[],'totpowerY':[],'totpowerZ':[],
               'powentropyX':[], 'powentropyY':[],'powentropyZ':[],
               'numpeaksX':[],'numpeaksY':[],'numpeaksZ':[],
               'MEANpeakintX':[],'MEANpeakintY':[],'MEANpeakintZ':[],
               'CVpeakintX':[],'CVpeakintY':[],'CVpeakintZ':[],'duration':[],
               'FFT1Z':[],'FFT2Z':[],'FFT3Z':[],
               'modeFFT1chunksX':[], 'SDFFT1chunksX':[], 'modeFFT2chunksX':[],'SDFFT2chunksX':[], 'modeFFT3chunksX':[], 'SDFFT3chunksX':[],
               'meanpowchunksX':[],'CVpowchunksX':[],'meanentropychunksX':[],'CVentropychunksX':[],
               'modeFFT1chunksY':[], 'SDFFT1chunksY':[],'modeFFT2chunksY':[], 'SDFFT2chunksY':[], 'modeFFT3chunksY':[], 'SDFFT3chunksY':[],
               'meanpowchunksY':[],'CVpowchunksY':[],'meanentropychunksY':[],'CVentropychunksY':[],
               'modeFFT1chunksZ':[], 'SDFFT1chunksZ':[], 'modeFFT2chunksZ':[], 'SDFFT2chunksZ':[], 'modeFFT3chunksZ':[], 'SDFFT3chunksZ':[],
               'meanpowchunksZ':[],'CVpowchunksZ':[],'meanentropychunksZ':[],'CVentropychunksZ':[],
               'onset_lag':[],'sumabs_acceleration':[]}

feature_df = pd.DataFrame(featureinfo,columns =['healthCode','record','age', 'gender','professional-diagnosis','recordId',
                                                'totpowerX','totpowerY','totpowerZ',
                                                'powentropyX','powentropyY','powentropyZ',
                                                'numpeaksX','numpeaksY','numpeaksZ',
                                                'MEANpeakintX','MEANpeakintY','MEANpeakintZ',
                                                'CVpeakintX','CVpeakintY','CVpeakintZ',
                                                'duration','FFT1Z','FFT2Z','FFT3Z',
                                                'modeFFT1chunksX','modeFFT2chunksX','SDFFT1chunksX', 'SDFFT2chunksX', 'modeFFT3chunksX', 'SDFFT3chunksX', 
                                                'meanpowchunksX','CVpowchunksX','meanentropychunksX','CVentropychunksX',
                                                'modeFFT1chunksY', 'SDFFT1chunksY', 'modeFFT2chunksY', 'SDFFT2chunksY', 'modeFFT3chunksY', 'SDFFT3chunksY',
                                                'meanpowchunksY','CVpowchunksY','meanentropychunksY','CVentropychunksY',
                                                'modeFFT1chunksZ', 'SDFFT1chunksZ','modeFFT2chunksZ', 'SDFFT2chunksZ', 'modeFFT3chunksZ', 'SDFFT3chunksZ', 
                                                'meanpowchunksZ','CVpowchunksZ','meanentropychunksZ','CVentropychunksZ',
                                                'onset_lag','sumabs_acceleration'])