# Mental Health Survey Final Data Prep

In [None]:
import datetime as dt
import itertools
import pandas as pd

import matplotlib.pyplot as plt
from IPython.core.interactiveshell import InteractiveShell
import synapseclient
from synapseclient import Activity, File, Schema, Table, as_table_columns

InteractiveShell.ast_node_interactivity = 'all'
syn = synapseclient.Synapse()
syn.login()

def isnum(x):
    if x is None:
        return False
    try:
        float(x)
        return True
    except ValueError:
        return False

In [None]:
v1sid, v2sid = 'syn12181333', 'syn12181343'

v1r = pd.read_csv(syn.get(v1sid).path, parse_dates=['timestamp'])
v2r = pd.read_csv(syn.get(v2sid).path, parse_dates=['timestamp'])

v1r.head()
v2r.head()

In [None]:
# upload v2 to public release portal as csv
v2p = v2r.copy()
name = 'V2_MentalHealthSvc.csv'

v2p.to_csv(name, index=None)
v2p = syn.setProvenance(
    syn.store(File(name=name, path=name, parent='syn10848316')),
    activity=Activity(
        name='Public Release',
        description='Prepare data for public realease',
        used=[v2sid],
        executed=[
            dict(
                name='IPython Notebook',
                url='https://github.com/apratap/BRIGHTEN-Data-Release/blob/master/Create_ImpactMHS_datafiles.ipynb'
            )
        ]
    )
)

In [None]:
v2sid='syn12181343'

In [None]:
v1 = v1r.drop(columns=[
    'userid', 'timestampUTC'
]).rename(columns={
    'brightenid':'participant_id',
    'timestamp':'dt_response'
})

# convert the response columns to appropiate indicators
def fx(x):
    if x == 'No':
        return 0
    elif x == 'Yes':
        return 1
    else:
        return -1
    
for i in range(1, 6):
    v1[f'mhs_{i}'] = v1[f'mhs_{i}'].apply(fx)

v1.head()

In [None]:
v2 = v2r.rename(columns={
    'userid':'participant_id',
    'timestamp':'dt_response'
})

# convert the response columns to appropiate indicators
def fx(x):
    if x == 'No':
        return 0
    elif x == 'Yes':
        return 1
    else:
        return -1
    
for i in range(1, 6):
    v2[f'mhs_{i}'] = v2[f'mhs_{i}'].apply(fx)

v2.head()

### Combine the DataFrames

In [None]:
combined = pd.concat([v1, v2], sort=False)
combined.head()

### Set provenance and upload to Synapse

In [None]:
final = syn.store(Table(
    Schema(
            name='IMPACT MHS',
            columns=as_table_columns(combined), 
            parent='syn10848316'),
        combined
    )
)

In [None]:
final = syn.setProvenance(
    'syn17022660',
    activity=Activity(
        name='Combine V1 and V2 data',
        description='Process and combine the data collected during study 1 and study 2',
        used=[v1sid, v2sid],
        executed=[
            dict(
                name='IPython Notebook',
                url='https://github.com/apratap/BRIGHTEN-Data-Release/blob/master/Create_ImpactMHS_datafiles.ipynb'
            )
        ]
    )
)