In [8]:
## package imports ##
%load_ext autoreload
%autoreload 2
import sys
import pdkit
sys.path.append("../../src")
import utils.query_utils as query
import synapseclient as sc
import pandas as pd

syn = sc.login()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Welcome, aryton tediarjo!



INFO:synapseclient_default:Welcome, aryton tediarjo!



In [4]:
MPOWER_GAIT_DATA_V1 = "syn21111818"
MPOWER_DEMO_DATA_V1 = "syn10371840"
MPOWER_GAIT_DATA_V2 = "syn21113231"
MPOWER_DEMO_DATA_V2 = "syn15673379"
MPOWER_GAIT_DATA_PASSIVE = "syn21114136"
EMS_PROF_DATA = "syn10235463"
EMS_DEMO_DATA = "syn10295288"
EMS_GAIT_DATA = "syn21256442"
METADATA_COLS  = ['recordId', 'healthCode', 'appVersion', 
                    'phoneInfo', 'createdOn', 'PD', 'MS',
                    'gender', 'age', 'version']
GIT_URL = "https://github.com/arytontediarjo/mPower-Analysis/blob/master/src/clean.py"


In [6]:
def create_mPowerV1_interim_gait_data(GAIT_DATA, DEMO_DATA):
    """
    Function to format mpower version 1 data,
    list of formatting done:
        -> Clean table from test users
        -> Combine raw data with demographic table
        -> Fix column naming convention
        -> Map diagnosis to binary values
        -> Clean data that is below the range of 0-100
        -> Filter gender to male and female
    Parameters:
    GAIT_DATA = Takes in raw featurized gait data on version 1 (synapse file entity)
    DEMO_DATA = Takes in demographic data (synapse table entity)
    returns a formatized dataset of featurized gait data with its respective demographic data
    """
    demo_data = syn.tableQuery("SELECT age, healthCode, inferred_diagnosis as PD, gender FROM {} where dataGroups\
                               NOT LIKE '%test_user%'".format(DEMO_DATA)).asDataFrame()
    demo_data = demo_data[(demo_data["gender"] == "Female") | (demo_data["gender"] == "Male")]
    demo_data = demo_data.dropna(subset = ["PD"], thresh = 1)                     ## drop if no diagnosis
    demo_data["PD"] = demo_data["PD"].map({True :1.0, False:0.0})                 ## encode as numeric binary
    demo_data["age"] = demo_data["age"].apply(lambda x: float(x))                 ## convert age to float
    demo_data = demo_data[(demo_data["age"] <= 100) & (demo_data["age"] >= 10)]   ## subset to realistic age ranges
    demo_data["gender"] = demo_data["gender"].apply(lambda x: x.lower())          ## lowercase gender for consistencies
    gait_data = query.get_file_entity(syn = syn, synid = GAIT_DATA)
    data = pd.merge(gait_data, demo_data, on = "healthCode", how = "inner")
    data_return   = data[[feature for feature in data.columns if "outbound" not in feature]]
    data_outbound = data[[feature for feature in data.columns if "return" not in feature]]
    data = pd.concat([query.fix_column_name(data_outbound), query.fix_column_name(data_return)])## combine return and outbound                                                   
    data = data.reset_index(drop = True)
    data = data[[feat for feat in data.columns if ("." in feat) or (feat in METADATA_COLS)]]
    return data

In [9]:
dataV1                    = create_mPowerV1_interim_gait_data(GAIT_DATA = MPOWER_GAIT_DATA_V1, DEMO_DATA = MPOWER_DEMO_DATA_V1)
dataV1["version"]         = "mpower_v1"

In [10]:
dataV1

Unnamed: 0,recordId,healthCode,appVersion,phoneInfo,createdOn,x.duration,x.no_of_steps,x.gait_step_regularity,x.gait_stride_regularity,x.gait_symmetry,...,AA.gait_symmetry,AA.frequency_of_peaks,AA.max_freeze_index,AA.freeze_occurences,AA.speed_of_gait,AA.0,age,PD,gender,version
0,a545c32b-1374-4197-ac62-29c9c2b16619,f75ebae0-a58b-440e-b57d-609bedb06181,"version 1.0, build 7",iPhone 6,1425939566000,11.4373405,18.0,0.846399318705465,0.9591392318747444,0.11273991316927923,...,0.0,83.97607795373418,1.641640305519104,0.0,0.1663954110163445,#ERROR,27.0,0.0,female,mpower_v1
1,63a4d801-e24f-4270-a8fb-3fe50b74d516,f75ebae0-a58b-440e-b57d-609bedb06181,"version 1.0, build 7",iPhone 6,1425939489000,12.027305625,15.0,0.8639674945530991,0.9643286699793396,0.10036117542624035,...,0.0,449.2724044979811,1.157542109489441,0.0,0.1944815285696478,#ERROR,27.0,0.0,female,mpower_v1
2,4994f24b-2029-4d75-9145-728a99305f92,f75ebae0-a58b-440e-b57d-609bedb06181,"version 1.0, build 7",iPhone 6,1425939405000,11.286638958,15.0,0.8619332515902689,0.9636576703068518,0.10172441871658278,...,0.0,85.4137740692418,1.090750813484192,0.0,0.1937494745977141,#ERROR,27.0,0.0,female,mpower_v1
3,75f544a3-27c1-4e4e-b0bc-e82450d6f0a3,f75ebae0-a58b-440e-b57d-609bedb06181,"version 1.0, build 7",iPhone 6,1426015468000,13.81617075,19.0,0.93361471814028,0.9336147181402799,0.0,...,0.05446918477429763,23.220413905402445,0.6663214564323425,0.0,0.23817553038559086,#ERROR,27.0,0.0,female,mpower_v1
4,884a26f4-ce18-4a75-b2e1-697bb8935cb0,f75ebae0-a58b-440e-b57d-609bedb06181,"version 1.0, build 7",iPhone 6,1426014312000,15.50300125,21.0,0.7879859956636691,0.9407496218640653,0.15276362620039619,...,0.05420893087491274,79.5747121000884,0.5980724692344666,0.0,0.21499161204896475,#ERROR,27.0,0.0,female,mpower_v1
5,f9b70717-4921-4f72-a522-2ecd20dc7540,f75ebae0-a58b-440e-b57d-609bedb06181,"version 1.0, build 7",iPhone 6,1426015558000,13.926730167,22.0,0.7551581636309943,0.9304328393252074,0.17527467569421307,...,0.0864852986587642,36.35624441693701,0.9652944803237915,0.0,0.23354610114038152,#ERROR,27.0,0.0,female,mpower_v1
6,28eb6ee2-a61c-4ccb-b71c-8440436dbfaa,294629e7-5991-47e8-936d-620137d87de1,"version 1.0, build 7",iPhone 6,1425959571000,21.655832792,23.0,0.9811913016992506,0.9811913016992506,0.0,...,0.06293990535812422,45.527333311834916,1.5945069789886475,0.0,0.12281404554428016,#ERROR,22.0,0.0,male,mpower_v1
7,5e8f4757-daa7-4465-beb7-f60fbe7ef4ae,294629e7-5991-47e8-936d-620137d87de1,"version 1.0, build 7",iPhone 6,1425933596000,20.05242725,0.0,0.0,0.0,0.0,...,0.04128516062391052,152.47313814622424,0.9308912754058838,0.0,0.1460334318715838,#ERROR,22.0,0.0,male,mpower_v1
8,90777ad7-3523-4a7f-b7ff-11131946d6b6,317f384e-0f34-403d-889b-45b248259642,"version 1.0, build 7",iPhone 5s (GSM),1425929234000,14.752181958,14.0,0.9064602779637093,0.9064602779637093,0.0,...,0.07838357428518927,34.19527578491958,0.6759125590324402,0.0,0.25403376230675,#ERROR,20.0,0.0,male,mpower_v1
9,0c9dbc52-a491-47ba-8afc-c6eca33db87c,317f384e-0f34-403d-889b-45b248259642,"version 1.0, build 7",iPhone 5s (GSM),1425928570000,24.026402708,4.0,0.9621958635704816,1.0,0.03780413642951841,...,0.0,21.36019080600151,1.7986871004104614,0.0,0.16248858285881007,#ERROR,20.0,0.0,male,mpower_v1
