#  Sleep Quality Checker Data Prep

In [7]:
import datetime as dt
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.core.interactiveshell import InteractiveShell
import synapseclient
from synapseclient import Activity, Schema, Table, as_table_columns

InteractiveShell.ast_node_interactivity = 'all'
syn = synapseclient.Synapse()
syn.login()


UPGRADE AVAILABLE

A more recent version of the Synapse Client (2.0.0) is available. Your version (1.9.4) can be upgraded by typing:
    pip install --upgrade synapseclient

Python Synapse Client version 2.0.0 release notes

https://python-docs.synapse.org/build/html/news.html



Welcome, Sean Deering!



### Get Raw Sleep Quality Checker Data

In [8]:
sleepquality_checker_raw_id = 'syn7117932'

sleepquality_checker = pd.read_csv(syn.get(sleepquality_checker_raw_id).path)

### Process Sleep Quality Checker Data

In [9]:
#rename columns
sleepquality_checker = sleepquality_checker.rename(columns={
    'sleepQualityResult':'sq_score',})

### Replace white space with NaN
sleepquality_checker = sleepquality_checker.replace(r'^\s*$', np.nan, regex=True)

### Filter based on age

In [10]:
underage_participants = pd.read_csv( syn.get('syn21905452').path, sep="\t")
sleepquality_checker = sleepquality_checker[~sleepquality_checker.participantId.isin(underage_participants.participantId)]

### Remove test accounts

In [11]:
test_accounts = pd.read_excel(syn.get('syn21958537').path)

sleepquality_checker = sleepquality_checker[~sleepquality_checker.participantId.isin(test_accounts.participantId)]

### Create Internal & External Copies of the Data

In [12]:
#download the data 
sharing_info = pd.read_excel(syn.get('syn21557215').path)
healthCodes_with_broadsharing = sharing_info[sharing_info.sharing == 'all_qualified_researchers']

EXTERNAL_SLEEPQUALITY_CHECKER_DATA = sleepquality_checker[sleepquality_checker.participantId.isin(healthCodes_with_broadsharing['participant id'])]
EXTERNAL_SLEEPQUALITY_CHECKER_DATA.shape

(42275, 3)

### External - Upload to Synapse

In [13]:
len(EXTERNAL_SLEEPQUALITY_CHECKER_DATA.participantId.unique())
SH_EXTERNAL_PROJECT = 'syn18492837'
table_schema_external = Schema(name='Sleep Quality Checker',
                               columns=as_table_columns(EXTERNAL_SLEEPQUALITY_CHECKER_DATA),
                               parent=SH_EXTERNAL_PROJECT)
sleepquality_checker_synTable_external = syn.store(Table(table_schema_external,EXTERNAL_SLEEPQUALITY_CHECKER_DATA))
sleepquality_checker_synTable_external

4460

<synapseclient.table.CsvFileTable at 0x114c21d10>

### Internal - Upload to Synapse

In [None]:
SH_INTERNAL_PROJECT = 'syn7066726'
table_schema_internal = Schema(name='Sleep Quality Checker Internal',
                               columns=as_table_columns(sleepquality_checker), 
                               parent=SH_INTERNAL_PROJECT)
sleepquality_checker_synTable_internal = syn.store(Table(table_schema_internal,sleepquality_checker))

### Set Provenance

In [None]:
activity=Activity(name= 'Sleep Quality Checker data curation', 
                  description='Process and convert raw data to table format', 
                  used=sleepquality_checker_raw_id, 
                  executed='https://github.com/apratap/SleepHealth_Data_Release/blob/master/Create_Sleep_Quality_Checker.ipynb')
#Internal 
syn.setProvenance(sleepquality_checker_synTable_internal, activity)

#External
syn.setProvenance(sleepquality_checker_synTable_external, activity)