# MHS Final Data Prep

In [None]:
import datetime as dt
import itertools
import pandas as pd

import matplotlib.pyplot as plt
from IPython.core.interactiveshell import InteractiveShell
import synapseclient
from synapseclient import Activity, File, Schema, Table, as_table_columns

InteractiveShell.ast_node_interactivity = 'all'
syn = synapseclient.Synapse()
syn.login()

### Prep V2 data for public release

In [None]:
v2sid = 'syn9974018'

v2r = pd.read_csv(syn.get(v2sid).path, parse_dates=['createdAt'])
v2r.head()

In [None]:
v2r = v2r.rename(columns={
    'Did a doctor ever prescribe a medication called Lithium to you?':'screen_1',
    'Were you prescribed any medication for having a period of being so excited or irritable that you got into trouble or your family or friends worried about it?':'screen_2',
    'Did a doctor ever say you were manic-depressive or had bipolar disorder?':'screen_3',
    'Did a doctor ever say that you have schizophrenia or a schizoaffective disorder or psychosis?':'screen_4'
})

v2r.head()

In [None]:
v2r.day.hist()

### Drop that meaningless column and upload to Synapse

In [None]:
v2r = v2r.drop(columns=['day'])

name = 'V2_MentalHealthScreen.csv'
v2r.to_csv(name, index=None)

t = syn.setProvenance(
    syn.store(File(name=name, path=name, parent='syn10848316')),
    activity=Activity(
        name='Public Release',
        description='Prep data for public release',
        used=[v2sid],
        executed=[
            dict(
                name='IPython Notebook',
                url='https://github.com/apratap/BRIGHTEN-Data-Release/blob/master/Create_MHS_datafiles.ipynb'
            )
        ]
    )
); del t

### Prep V1 and V2 for release

In [None]:
v1sid, v2sid = 'syn12204723', 'syn17023315'

v1r = pd.read_csv(syn.get(v1sid).path, parse_dates=['timestamp'])
v2r = pd.read_csv(syn.get(v2sid).path, parse_dates=['createdAt'])

v1r.head()
v2r.head()

#### Prep V1 data

In [None]:
v1 = v1r.drop(columns=[
    'userid', 'timestampUTC'
]).rename(columns={
    'brightenid':'participant_id',
    'timestamp':'dt_response'
})

def fx(x):
    if x == 'No':
        return 0
    elif x == 'Yes':
        return 1
    else:
        raise ValueError(f'encoding not mapped: {x}')

v1.screen_1 = v1.screen_1.apply(fx)
v1.screen_2 = v1.screen_2.apply(fx)
v1.screen_3 = v1.screen_3.apply(fx)
v1.screen_4 = v1.screen_4.apply(fx)
        
v1.head()

#### Prep V2 data

In [None]:
v2 = v2r.rename(columns={
    'username':'participant_id',
    'createdAt':'dt_response'
}).loc[:, ['participant_id', 'dt_response', 'screen_1', 'screen_2', 'screen_3', 'screen_4']]

v2.screen_1 = v2.screen_1.apply(fx)
v2.screen_2 = v2.screen_2.apply(fx)
v2.screen_3 = v2.screen_3.apply(fx)
v2.screen_4 = v2.screen_4.apply(fx)
        
v2.head()

### Combine

In [None]:
combined = pd.concat([v1, v2], sort=False)
combined.head()

### Upload to Synapse

In [None]:
final = syn.store(Table(
    Schema(
            name='Mental Health Screen',
            columns=as_table_columns(combined), 
            parent='syn10848316'),
        combined
    )
)

In [None]:
final = syn.setProvenance(
    'syn17023316',
    activity=Activity(
        name='Combine V1 and V2 data',
        description='Process and combine the data collected during study 1 and study 2',
        used=[v1sid, v2sid],
        executed=[
            dict(
                name='IPython Notebook',
                url='https://github.com/apratap/BRIGHTEN-Data-Release/blob/master/Create_MHS_datafiles.ipynb'
            )
        ]
    )
)