# In this notebook we load the dataframes of demographics data walking filenames, returned in get__tables. We then open the actual JSON files, perform a quaternion rotation on the accelerometer files, create an new data frame with rotated acceleration, timestamp and healthCode columns, combine with the demographics dataframe, and pickle!

In [None]:
# A quaternion is an element of a 4 dimensional vector-space. It's defined as w + xi + yj + zk where i, j and k are imaginary numbers. 
# The device Motion object from the iPhone includes an attitude array in addition to the acceleration array. This attitude array describes
# the rotation of the phone in space as a quaternion, and can be used to transform the coordinate from of the acceleration
# data from phone to world coodinates.

# Code for quaternion rotation was confirmed and adapted from stack overflow post for normalizing quaternion.
# See http://stackoverflow.com/questions/4870393/rotating-coordinate-system-via-a-quaternion

In [355]:
%matplotlib inline

In [356]:
import json
import numpy as np
import pandas as pd
import matplotlib.pylab as pl

In [357]:
demographics_df = pd.read_pickle('demographics_df.pkl')
walking_df = pd.read_pickle('walking_df.pkl')

# Quaternion function 1

In [None]:
# This function makes sure that the each quaternion in the angular phone information vector is properly normalized. The
# magnitude should be close to 1 within some tolerance. If it is not, rescale the quaternion.

In [359]:
def normalize(v, tolerance=0.00001):
    mag2 = sum(n * n for n in v)
    if abs(mag2 - 1.0) > tolerance:
        mag = sqrt(mag2)
        v = tuple(n / mag for n in v)
    return v

# Quaternion function 2. 

In [None]:
# This funciton returns the conjugate of the quaternion

In [360]:
def get_quaternion_conjugate(quaternion):
    w, x, y, z = quaternion
    return (w, -x, -y, -z)

# 3. Quaternion function 3.

In [None]:
# This function multiples two quaternions

In [361]:
def multiply_quaternions(quaternion1, quaternion2):
    w1, x1, y1, z1 = quaternion1
    w2, x2, y2, z2 = quaternion2
    # rearranged a little for my own clarity
    w = (w1 * w2) - (x1 * x2) - (y1 * y2) - (z1 * z2)
    x = (w1 * x2) + (x1 * w2) + (y1 * z2) - (z1 * y2)
    y = (w1 * y2) - (x1 * z2) + (y1 * w2) + (z1 * x2)
    z = (w1 * z2) + (x1 * y2) - (y1 * x2) + (z1 * w2)  
    return w, x, y, z

# Quaternion function 4.

In [362]:
def multiply_quaternionANDvector(q1, v1):
    q1 = normalize(q1,tolerance = 0.00001) 
    q2 = (0.0,) + v1
    return multiply_quaternions(multiply_quaternions(q1, q2), get_quaternion_conjugate(q1))[1:]

# Initialize final data frame

In [363]:
dfdata= {'time':[],'rotX':[],'rotY':[],'rotZ':[],'total_rawacceleration':[],'rest_time':[],'rest_rotX':[],'rest_rotY':[],'rest_rotZ':[],'rest_total_rawacceleration':[],'healthCode':[]}
Accelerometer_df = pd.DataFrame(dfdata, columns =['time','rotX','rotY','rotZ','total_rawacceleration','rest_time','rest_rotX','rest_rotY','rest_rotZ','rest_total_rawacceleration','healthCode'])

# Go through files and get accelerometer data

In [364]:
mypkls =['0_2500','2500_5000','5000_7500','7500_10000','10000_12500','12500_15000','15000_17500',
         '17500_20000','20000_22500','22500_23093']

for entry in range(0,2500):
    
    if entry % 250 == 0:
        print entry
        
    healthCode = walking_df['healthCode'][entry]
    recordId = walking_df['recordId'][entry]
    medTimepoint = walking_df['medTimepoint'][entry]
    walkPath = walking_df['DMoutboundPaths'][entry]
    restPath = walking_df['DMrestPaths'][entry]

    walkingrecord = [json.loads(line) for line in open(walkPath)]
    restrecord = [json.loads(line) for line in open(restPath)]
    
    walkingrecordlist=walkingrecord[0]
    restrecordlist=restrecord[0]
    
    barlist = np.arange(len(walkingrecordlist))
    restbarlist = np.arange(len(restrecordlist))
    
    time = []
    acceleration_vector = []
    attitude_quaternion = []
    rest_acceleration_vector = []
    attitude_quaternion = []
    
    time = [(walkingrecordlist[bar]['timestamp']) for bar in barlist]
    acceleration_vector = [(walkingrecordlist[bar]['userAcceleration']['x'],
                            walkingrecordlist[bar]['userAcceleration']['y'],
                            walkingrecordlist[bar]['userAcceleration']['z']) for bar in barlist]
    attitude_quaternion = [(walkingrecordlist[bar]['attitude']['w'],
                            walkingrecordlist[bar]['attitude']['x'],
                            walkingrecordlist[bar]['attitude']['y'],
                            walkingrecordlist[bar]['attitude']['z']) for bar in barlist]
    
    rest_time = [(restrecordlist[restbar]['timestamp']) for restbar in restbarlist]
    rest_acceleration_vector = [(restrecordlist[restbar]['userAcceleration']['x'],
                            restrecordlist[restbar]['userAcceleration']['y'],
                            restrecordlist[restbar]['userAcceleration']['z']) for bar in restbarlist]
    rest_attitude_quaternion = [(restrecordlist[restbar]['attitude']['w'],
                            restrecordlist[restbar]['attitude']['x'],
                            restrecordlist[restbar]['attitude']['y'],
                            restrecordlist[restbar]['attitude']['z']) for restbar in restbarlist]
    
    # Calculate quadratic mean of original acceleration signal - this will be used as another directionless "axis" in my feature calculations
    [X, Y, Z] = zip(*acceleration_vector) 
    total_rawacceleration = np.sqrt(np.square(X)+np.square(Y)+np.square(Z))
    [restX, restY, restZ] = zip(*rest_acceleration_vector) 
    rest_total_rawacceleration = np.sqrt(np.square(restX)+np.square(restY)+np.square(restZ))
    
    # Apply quaternion rotations here
    plist = np.arange(len(acceleration_vector))
    quatrot = [multiply_quaternionANDvector(attitude_quaternion[p],acceleration_vector[p]) for p in plist]
    [rotX, rotY, rotZ] = zip(*quatrot) 
    
    rlist = np.arange(len(rest_acceleration_vector))
    rest_quatrot = [multiply_quaternionANDvector(rest_attitude_quaternion[r],rest_acceleration_vector[r]) for r in rlist]
    [rest_rotX, rest_rotY, rest_rotZ] = zip(*rest_quatrot) 
    
    dfdata= {'time':[],'rotX':[],'rotY':[],'rotZ':[],'total_rawacceleration':[],'rest_time':[],'rest_rotX':[],'rest_rotY':[],'rest_rotZ':[],'rest_total_rawacceleration':[],'healthCode':[]}

    Accelerometer_df = Accelerometer_df.append({'time':time,'rotX':rotX,'rotY':rotY,'rotZ':rotZ,
                                                'total_rawacceleration':total_rawacceleration,'rest_time':rest_time,
                                                'rest_rotX':rest_rotX,'rest_rotY':rest_rotY,'rest_rotZ':rest_rotZ,
                                                'rest_total_rawacceleration':rest_total_rawacceleration,
                                                'healthCode':healthCode, 'recordId':recordId,'medTimepoint':medTimepoint},ignore_index=True)

22500
22750
23000


# Cross-reference Accelerometer data with Demographics data

In [365]:
# add relevant demographics data to walking activity feature data frame
Demographics_df = demographics_df.drop(['recordId','createdOn','appVersion','phoneInfo','are-caretaker', 'deep-brain-stimulation',
                                        'education','employment','health-history','healthcare-provider',
                                        'home-usage','last-smoked','maritalStatus','medical-usage',
                                        'medical-usage-yesterday','packs-per-day',
                                        'past-participation','phone-usage','race','smartphone',
                                        'smoked','surgery','video-usage','years-smoking',
                                        'diagnosis-year','medication-start-year','onset-year'],axis=1)
Combined_df = pd.merge(Accelerometer_df, Demographics_df, on='healthCode')


In [None]:
# Remove small amount of individuals that have just taken their Parkinson's medication

In [367]:
df = Combined_df[Combined_df.medTimepoint != 'Just after Parkinson medication (at your best)']
Final_df = df.drop(['medTimepoint'],axis=1)

504

# Pickle the DataFrames for Step 3!

In [370]:
Final_df.to_pickle('Accelerometer_Demographics_df')