# Sleep Assessment Data Prep

In [2]:
import datetime as dt
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.core.interactiveshell import InteractiveShell
import synapseclient
from synapseclient import Activity, Schema, Table, as_table_columns

InteractiveShell.ast_node_interactivity = 'all'
syn = synapseclient.Synapse()
syn.login()


UPGRADE AVAILABLE

A more recent version of the Synapse Client (2.0.0) is available. Your version (1.9.4) can be upgraded by typing:
    pip install --upgrade synapseclient

Python Synapse Client version 2.0.0 release notes

https://python-docs.synapse.org/build/html/news.html



Welcome, Sean Deering!



### Get Raw Sleep Assessment Data

In [3]:
sleepassessment_raw_id = 'syn7117910'

sleepassessment = pd.read_csv(syn.get(sleepassessment_raw_id).path)


### Process Sleep Assessment Data

In [5]:
#remove brackets
def remove_brackets(col):
    return( col.astype(str).str.replace(']', '').str.replace('[',''))
sleepassessment.alcohol = remove_brackets(sleepassessment.alcohol)
sleepassessment.concentrating_problem_one = remove_brackets(sleepassessment.concentrating_problem_one)
sleepassessment.concentrating_problem_two = remove_brackets(sleepassessment.concentrating_problem_two)
sleepassessment.discomfort_in_sleep = remove_brackets(sleepassessment.discomfort_in_sleep)
sleepassessment.exercise = remove_brackets(sleepassessment.exercise)
sleepassessment.fatigue_limit = remove_brackets(sleepassessment.fatigue_limit)
sleepassessment.feel_tired_frequency = remove_brackets(sleepassessment.feel_tired_frequency)
sleepassessment.felt_alert = remove_brackets(sleepassessment.felt_alert)
sleepassessment.discomfort_in_sleep = remove_brackets(sleepassessment.discomfort_in_sleep)
sleepassessment.had_problem = remove_brackets(sleepassessment.had_problem)
sleepassessment.hard_times = remove_brackets(sleepassessment.hard_times)
sleepassessment.medication_by_doctor = remove_brackets(sleepassessment.medication_by_doctor)
sleepassessment.poor_sleep_problems = remove_brackets(sleepassessment.poor_sleep_problems)
sleepassessment.sleep_aids = remove_brackets(sleepassessment.sleep_aids)
sleepassessment.sleep_problem = remove_brackets(sleepassessment.sleep_problem)
sleepassessment.think_clearly = remove_brackets(sleepassessment.think_clearly)
sleepassessment.tired_easily = remove_brackets(sleepassessment.tired_easily)
sleepassessment.told_by_doctor = remove_brackets(sleepassessment.told_by_doctor)
sleepassessment.told_by_doctor_specify = remove_brackets(sleepassessment.told_by_doctor_specify)
sleepassessment.told_to_doctor = remove_brackets(sleepassessment.told_to_doctor)
sleepassessment.trouble_staying_awake = remove_brackets(sleepassessment.trouble_staying_awake)

### Filter based on age

In [6]:
underage_participants = pd.read_csv( syn.get('syn21905452').path, sep="\t")
sleepassessment = sleepassessment[~sleepassessment.participantId.isin(underage_participants.participantId)]

### Create Internal & External Copies of the Data

In [8]:
#download the data 
sharing_info = pd.read_excel(syn.get('syn21557215').path)
healthCodes_with_broadsharing = sharing_info[sharing_info.sharing == 'all_qualified_researchers']

EXTERNAL_SLEEPASSESSMENT_DATA = sleepassessment[sleepassessment.participantId.isin(healthCodes_with_broadsharing['participant id'])]
EXTERNAL_SLEEPASSESSMENT_DATA.shape

(2318, 22)

### External - Upload to Synapse

In [9]:
len(EXTERNAL_SLEEPASSESSMENT_DATA.participantId.unique())
SH_EXTERNAL_PROJECT = 'syn18492837'
table_schema_external = Schema(name='Sleep Assessment',
                               columns=as_table_columns(EXTERNAL_SLEEPASSESSMENT_DATA),
                               parent=SH_EXTERNAL_PROJECT)
sleepassessment_synTable_external = syn.store(Table(table_schema_external,EXTERNAL_SLEEPASSESSMENT_DATA))
sleepassessment_synTable_external

2221

<synapseclient.table.CsvFileTable at 0x10a719610>

### Internal - Upload to Synapse

In [10]:
SH_INTERNAL_PROJECT = 'syn7066726'
table_schema_internal = Schema(name='Sleep Assessment Internal',
                               columns=as_table_columns(sleepassessment), 
                               parent=SH_INTERNAL_PROJECT)
sleepassessment_synTable_internal = syn.store(Table(table_schema_internal,sleepassessment))

### Set Provenance

In [11]:
activity=Activity(name= 'Sleep Assessment data curation', 
                  description='Process and convert raw data to table format', 
                  used=sleepassessment_raw_id, 
                  executed='https://github.com/apratap/SleepHealth_Data_Release/blob/master/Create_Sleep_Assessment.ipynb')
#Internal 
syn.setProvenance(sleepassessment_synTable_internal, activity)

#External
syn.setProvenance(sleepassessment_synTable_external, activity)

{u'createdBy': u'3334346',
 u'createdOn': u'2020-02-27T00:45:15.562Z',
 u'description': u'Process and convert raw data to table format',
 u'etag': u'63b5a440-bdf7-4153-aed3-02a83fccacbd',
 u'id': u'10191763',
 u'modifiedBy': u'3334346',
 u'modifiedOn': u'2020-02-27T00:45:15.562Z',
 u'name': u'Sleep Assessment data curation',
 u'used': [{u'concreteType': u'org.sagebionetworks.repo.model.provenance.UsedEntity',
   u'reference': {u'targetId': u'syn7117910', u'targetVersionNumber': 770},
   u'wasExecuted': False},
  {u'concreteType': u'org.sagebionetworks.repo.model.provenance.UsedURL',
   u'name': u'https://github.com/deerings/SLEEPHEALTHv2-Data-Release/Create_Sleep_Assessment.ipynb',
   u'url': u'https://github.com/deerings/SLEEPHEALTHv2-Data-Release/Create_Sleep_Assessment.ipynb',
   u'wasExecuted': True}]}

{u'createdBy': u'3334346',
 u'createdOn': u'2020-02-27T00:45:18.055Z',
 u'description': u'Process and convert raw data to table format',
 u'etag': u'fd6b49fc-ebde-4024-9f16-d715da3f5502',
 u'id': u'10191764',
 u'modifiedBy': u'3334346',
 u'modifiedOn': u'2020-02-27T00:45:18.055Z',
 u'name': u'Sleep Assessment data curation',
 u'used': [{u'concreteType': u'org.sagebionetworks.repo.model.provenance.UsedEntity',
   u'reference': {u'targetId': u'syn7117910', u'targetVersionNumber': 770},
   u'wasExecuted': False},
  {u'concreteType': u'org.sagebionetworks.repo.model.provenance.UsedURL',
   u'name': u'https://github.com/deerings/SLEEPHEALTHv2-Data-Release/Create_Sleep_Assessment.ipynb',
   u'url': u'https://github.com/deerings/SLEEPHEALTHv2-Data-Release/Create_Sleep_Assessment.ipynb',
   u'wasExecuted': True}]}