#  PVT Data Prep (Alertness Checker)

In [1]:
import datetime as dt
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.core.interactiveshell import InteractiveShell
import synapseclient
from synapseclient import Activity, Schema, Table, as_table_columns

syn = synapseclient.Synapse()
syn.login()


UPGRADE AVAILABLE

A more recent version of the Synapse Client (2.0.0) is available. Your version (1.9.4) can be upgraded by typing:
    pip install --upgrade synapseclient

Python Synapse Client version 2.0.0 release notes

https://python-docs.synapse.org/build/html/news.html



Welcome, Sean Deering!



### Get Raw PVT Data

In [2]:
pvt_raw_id = 'syn7117882'
pvt_raw = pd.read_csv(syn.get(pvt_raw_id).path)
pvt_raw.head(n=1)

Unnamed: 0,participantId,result.VigilanceResponseResult,result.StimuliArray[0].InterStimuliTime,result.StimuliArray[0].WrongTaps,result.StimuliArray[0].ReactionTime,result.StimuliArray[0].StimulusNo,result.StimuliArray[1].InterStimuliTime,result.StimuliArray[1].WrongTaps,result.StimuliArray[1].ReactionTime,result.StimuliArray[1].StimulusNo,...,result.StimuliArray[45].ReactionTime,result.StimuliArray[45].StimulusNo,result.StimuliArray[46].InterStimuliTime,result.StimuliArray[46].WrongTaps,result.StimuliArray[46].ReactionTime,result.StimuliArray[46].StimulusNo,endDate,startDate,item,timestamp
0,93ceb9bc-5e6f-495d-8d2f-786bc694fc4f,251.77747,2.000023,0,397.026002,1,2.394613,0.0,370.995045,2.0,...,464.231968,46.0,3.894904,0.0,489.142001,47.0,2016-05-08T18:08:22-07:00,2016-05-08T18:08:22-07:00,VigilanceResponseResult,2016-05-08T18:08:22-07:00


### Process PVT Data

In [3]:
#Reshape 
pvt = pvt_raw.melt(id_vars = ['participantId' , 'endDate', 'startDate', 'item', 'timestamp'],
         value_vars = pvt_raw.columns[pvt_raw.columns.str.startswith('result.StimuliArray')],
          var_name  = 'feature',
          value_name = 'value')

#drop cols
pvt = pvt.drop(['item', 'startDate', 'endDate'], 1)

#Extract pvt stimuli block num
pvt['block'] = pvt.feature.str.extract('.*\[(\d+)\].*', expand=False)

#Remove prefix in feature col 
pvt['feature'] = pvt.feature.replace(to_replace = 'result.*\]\.', value='', regex=True)

### Reshape
pvt = pvt.set_index(['participantId', 'timestamp', 'block', 'feature']).unstack(level=-1).reset_index()

#Fix multiindex column mess 
first_3_cols = pvt.columns[0:3].droplevel(1).tolist()
last_4_cols = pvt.columns[3:].droplevel(0).tolist()
first_3_cols.extend(last_4_cols)
new_colnames = first_3_cols
pvt.columns = new_colnames


#Remove block col as it is same as StimuluNo
pvt = pvt.drop(['block'], axis=1)

#Rename col
pvt = pvt.rename(columns={"StimulusNo":"StimulusNum", "InterStimuliTime" : "InterStimulusInterval"})

#deleting rows where StimulusNum is NaN
pvt = pvt[pvt.StimulusNum.notnull()]

#Change dtype of StimulusNum to INT
pvt.StimulusNum = pvt.StimulusNum.astype(int)

#Round up to 4 digits 
pvt.InterStimulusInterval = pvt.InterStimulusInterval.round(decimals=4)
pvt.ReactionTime = pvt.ReactionTime.round(decimals=4)

### Replace white space with NaN
pvt = pvt.replace(r'^\s*$', np.nan, regex=True)

## Filter out underage participants

In [4]:
underage_participants = pd.read_csv( syn.get('syn21905452').path, sep="\t")
pvt = pvt[~pvt.participantId.isin(underage_participants.participantId)]

### Remove test accounts

In [5]:
test_accounts = pd.read_excel(syn.get('syn21958537').path)

pvt = pvt[~pvt.participantId.isin(test_accounts.participantId)]

## Create Internal & External Copy of the data 

In [6]:
#download the data 
sharing_info = pd.read_excel(syn.get('syn21557215').path)
healthCodes_with_broadsharing = sharing_info[sharing_info.sharing == 'all_qualified_researchers']

EXTERNAL_PVT_DATA = pvt[pvt.participantId.isin(healthCodes_with_broadsharing['participant id'])]
EXTERNAL_PVT_DATA.shape

(809025, 6)

### External - Upload to Synapse

In [7]:
len(EXTERNAL_PVT_DATA.participantId.unique())
SH_EXTERNAL_PROJECT = 'syn18492837'
table_schema_external = Schema(name='Alertness Checker - Psychomotor Vigilance Task',
                               columns=as_table_columns(EXTERNAL_PVT_DATA),
                               parent=SH_EXTERNAL_PROJECT)
pvt_synTable_external = syn.store(Table(table_schema_external,EXTERNAL_PVT_DATA))
pvt_synTable_external

<synapseclient.table.CsvFileTable at 0x1109bf090>

### Internal - Upload to Synapse

In [9]:
SH_INTERNAL_PROJECT = 'syn7066726'
table_schema_internal = Schema(name='Alertness Checker - Psychomotor Vigilance Task Internal',
                               columns=as_table_columns(pvt), 
                               parent=SH_INTERNAL_PROJECT)
pvt_synTable_internal = syn.store(Table(table_schema_internal,pvt))

## Attach provenance


In [10]:
activity=Activity(name= 'Psychomotor Vigilance Task data curation', 
                  description='Process and convert raw data to table format', 
                  used=pvt_raw_id, 
                  executed='https://github.com/apratap/SleepHealth_Data_Release/blob/master/Create_Alertness_Checker_PVT.ipynb')
#Internal 
syn.setProvenance(pvt_synTable_internal, activity)

#External
syn.setProvenance(pvt_synTable_external, activity)

{u'createdBy': u'3334346',
 u'createdOn': u'2020-04-16T20:16:44.674Z',
 u'description': u'Process and convert raw data to table format',
 u'etag': u'0e8784cb-d09b-4b1d-85da-da640c82f249',
 u'id': u'10233224',
 u'modifiedBy': u'3334346',
 u'modifiedOn': u'2020-04-16T20:16:44.674Z',
 u'name': u'Psychomotor Vigilance Task data curation',
 u'used': [{u'concreteType': u'org.sagebionetworks.repo.model.provenance.UsedEntity',
   u'reference': {u'targetId': u'syn7117882', u'targetVersionNumber': 753},
   u'wasExecuted': False},
  {u'concreteType': u'org.sagebionetworks.repo.model.provenance.UsedURL',
   u'name': u'https://github.com/apratap/SleepHealth_Data_Release/blob/master/Create_Alertness_Checker_PVT.ipynb',
   u'url': u'https://github.com/apratap/SleepHealth_Data_Release/blob/master/Create_Alertness_Checker_PVT.ipynb',
   u'wasExecuted': True}]}