# This notebook downloads the challenge related data.
# It also creates .tsv files with the metadata + file locations.

In [1]:
#baseDIR = '/home/pataki/synapse/gitParkinson/' # base directory of the github repo
#uncomment the line above (+ update) if you are running this notebook in an empty namespace
try: baseDIR
except NameError:
    print('Error: baseDIR not found!')

In [2]:
import synapseclient
import pandas as pd
import getpass

In [3]:
print('Email for synapse:')
EMAIL = input()
print('\nPassword for synapse:')
PWD   = getpass.getpass()

········


In [4]:
syn = synapseclient.login(email=EMAIL, password=PWD, rememberMe=True)

Welcome, Balint Armin Pataki!



## Download the test data 

In [5]:
testDataQuery = syn.tableQuery("SELECT * FROM syn10701954")

testDF = testDataQuery.asDataFrame()
#print(testDF.shape)
testDF.head()

(2160, 11)


Unnamed: 0,dataFileHandleId,device,patient,session,site,task,visit,deviceSide,tremorScore,dyskinesiaScore,bradykinesiaScore
12959_10,17287630,GENEActiv,14_BOS,1,Boston,drnkg,1,Right,Score,NotApplicable,Score
12960_10,17287634,Pebble,14_BOS,1,Boston,drnkg,1,Left,Score,NotApplicable,Score
12961_10,17287639,GENEActiv,14_BOS,1,Boston,fldng,1,Right,Score,NotApplicable,Score
12962_10,17287642,Pebble,14_BOS,1,Boston,fldng,1,Left,Score,NotApplicable,Score
12963_10,17287647,GENEActiv,14_BOS,1,Boston,ftnl1,1,Right,NotApplicable,Score,NotApplicable


In [6]:
query_ldopa_table_test = syn.tableQuery("SELECT 'dataFileHandleId' FROM syn10701954")

In [7]:
query_ldopa_table_test.asDataFrame().shape

(2160, 1)

In [8]:
tsv_files_test  = syn.downloadTableColumns(query_ldopa_table_test, "dataFileHandleId")

Downloading 0 files, 2160 cached locally


In [9]:
fileMapDF = pd.DataFrame({'dataFileHandleId': list(tsv_files_test.keys()), 
                          'fileName':list(tsv_files_test.values())})
fileMapDF['dataFileHandleId'] = [int(i) for i in fileMapDF.dataFileHandleId]
fileMapDF.head()

Unnamed: 0,dataFileHandleId,fileName
0,17287630,/home/pataki/.synapseCache/630/17287630/drnkg_...
1,17287634,/home/pataki/.synapseCache/634/17287634/drnkg_...
2,17287639,/home/pataki/.synapseCache/639/17287639/fldng_...
3,17287642,/home/pataki/.synapseCache/642/17287642/fldng_...
4,17287647,/home/pataki/.synapseCache/647/17287647/ftnl1_...


In [10]:
testDF = pd.merge(fileMapDF, testDF, on='dataFileHandleId', how='inner')
testDF.head()

Unnamed: 0,dataFileHandleId,fileName,device,patient,session,site,task,visit,deviceSide,tremorScore,dyskinesiaScore,bradykinesiaScore
0,17287630,/home/pataki/.synapseCache/630/17287630/drnkg_...,GENEActiv,14_BOS,1,Boston,drnkg,1,Right,Score,NotApplicable,Score
1,17287634,/home/pataki/.synapseCache/634/17287634/drnkg_...,Pebble,14_BOS,1,Boston,drnkg,1,Left,Score,NotApplicable,Score
2,17287639,/home/pataki/.synapseCache/639/17287639/fldng_...,GENEActiv,14_BOS,1,Boston,fldng,1,Right,Score,NotApplicable,Score
3,17287642,/home/pataki/.synapseCache/642/17287642/fldng_...,Pebble,14_BOS,1,Boston,fldng,1,Left,Score,NotApplicable,Score
4,17287647,/home/pataki/.synapseCache/647/17287647/ftnl1_...,GENEActiv,14_BOS,1,Boston,ftnl1,1,Right,NotApplicable,Score,NotApplicable


In [11]:
testDF.to_csv(baseDIR + 'metaDB/testMetaDF.tsv', sep='\t', index=False)

## Download the train data 

In [12]:
trainDataQuery = syn.tableQuery("SELECT * FROM syn10495809")

trainDF = trainDataQuery.asDataFrame()
#print(trainDF.shape)
trainDF.head()

(5168, 11)


Unnamed: 0,dataFileHandleId,device,patient,session,site,task,visit,deviceSide,tremorScore,dyskinesiaScore,bradykinesiaScore
25819_22,17286028,GENEActiv,13_BOS,1,Boston,drnkg,1,Right,1.0,,1.0
25820_22,17286030,Pebble,13_BOS,1,Boston,drnkg,1,Left,1.0,,1.0
25821_22,17286034,GENEActiv,13_BOS,1,Boston,fldng,1,Right,2.0,,1.0
25822_22,17286036,Pebble,13_BOS,1,Boston,fldng,1,Left,1.0,,1.0
25823_22,17286039,GENEActiv,13_BOS,1,Boston,ftnl1,1,Right,,0.0,


In [13]:
query_ldopa_table_train = syn.tableQuery("SELECT 'dataFileHandleId' FROM syn10495809")

In [14]:
query_ldopa_table_train.asDataFrame().shape

(5168, 1)

In [15]:
tsv_files_train  = syn.downloadTableColumns(query_ldopa_table_train, "dataFileHandleId")

Downloading 0 files, 5168 cached locally


In [16]:
fileMapDF = pd.DataFrame({'dataFileHandleId': list(tsv_files_train.keys()), 
                          'fileName':list(tsv_files_train.values())})
fileMapDF['dataFileHandleId'] = [int(i) for i in fileMapDF.dataFileHandleId]
fileMapDF.head()

Unnamed: 0,dataFileHandleId,fileName
0,17286028,/home/pataki/.synapseCache/28/17286028/drnkg_G...
1,17286030,/home/pataki/.synapseCache/30/17286030/drnkg_P...
2,17286034,/home/pataki/.synapseCache/34/17286034/fldng_G...
3,17286036,/home/pataki/.synapseCache/36/17286036/fldng_P...
4,17286039,/home/pataki/.synapseCache/39/17286039/ftnl1_G...


In [17]:
trainDF = pd.merge(fileMapDF, trainDF, on='dataFileHandleId', how='inner')
trainDF.head()

Unnamed: 0,dataFileHandleId,fileName,device,patient,session,site,task,visit,deviceSide,tremorScore,dyskinesiaScore,bradykinesiaScore
0,17286028,/home/pataki/.synapseCache/28/17286028/drnkg_G...,GENEActiv,13_BOS,1,Boston,drnkg,1,Right,1.0,,1.0
1,17286030,/home/pataki/.synapseCache/30/17286030/drnkg_P...,Pebble,13_BOS,1,Boston,drnkg,1,Left,1.0,,1.0
2,17286034,/home/pataki/.synapseCache/34/17286034/fldng_G...,GENEActiv,13_BOS,1,Boston,fldng,1,Right,2.0,,1.0
3,17286036,/home/pataki/.synapseCache/36/17286036/fldng_P...,Pebble,13_BOS,1,Boston,fldng,1,Left,1.0,,1.0
4,17286039,/home/pataki/.synapseCache/39/17286039/ftnl1_G...,GENEActiv,13_BOS,1,Boston,ftnl1,1,Right,,0.0,


In [18]:
trainDF.to_csv(baseDIR + 'metaDB/trainMetaDF.tsv', sep='\t', index=False)