# Alchohol Use Final Data Prep

In [None]:
import datetime as dt
import pandas as pd

from IPython.core.interactiveshell import InteractiveShell
import synapseclient
from synapseclient import Activity, Schema, Table, as_table_columns

InteractiveShell.ast_node_interactivity = 'all'
syn = synapseclient.Synapse()
syn.login()

In [None]:
v1sid, v2sid =  'syn10250481', 'syn9974011'

v1 = pd.read_excel(syn.get(v1sid).path, parse_dates=['response_local'])
v2 = pd.read_csv(syn.get(v2sid).path, parse_dates=['createdAt'])

v1.head()
v2.head()

### Process V1 data

In [None]:
# drop the uneeded columns
v1 = v1.drop(columns=[
    'sent_time_local', 'sent_time_utc', 'response_utc', 'response_id', 'user_id', 'audit'
]).rename(columns={
    'brightenid': 'participant_id',
    'audit1':'alc_1',
    'audit2':'alc_2',
    'audit3':'alc_3',
    'response_local':'dt_response'
})

# add qsum
v1['alc_sum'] = [t.alc_1 + t.alc_2 + t.alc_3 for t in v1.itertuples()]

v1.head()

### Process V2 data

In [None]:
v2 = v2.drop(columns=['day'])\
     .rename(columns={
        'How often did you have a drink containing alcohol in the past year?':'alc_1',
        'How many drinks did you have on a typical day when you were drinking in the past year?':'alc_2',
        'How often did you have six or more drinks on one occasion in the past year?':'alc_3',
        'username':'participant_id',
        'createdAt': 'dt_response'
})

# add qsum
v2['alc_sum'] = [t.alc_1 + t.alc_2 + t.alc_3 for t in v2.itertuples()]

v2.head()

### Combine the DataFrames

In [None]:
combined = pd.concat([v1, v2], sort=False)

# order the columns
combined = combined.loc[:,
    ['participant_id', 'dt_response', 'alc_1', 'alc_2', 'alc_3', 'alc_sum']
]

combined.head()

### Set provenance and upload to Synapse

In [None]:
alc_final = syn.store(Table(
    Schema(
            name='Alcohol Consumption Survey',
            columns=as_table_columns(combined), 
            parent='syn10848316'),
        combined
    )
)

In [None]:
phq2_final = syn.setProvenance(
    'syn17021280',
    activity=Activity(
        name='Combine V1 and V2 data',
        description='Process and combine the data collected during study 1 and study 2',
        used=[v1sid, v2sid],
        executed=[
            dict(
                name='IPython Notebook',
                url='https://github.com/apratap/BRIGHTEN-Data-Release/blob/master/Create_Alcohol_datafiles.ipynb'
            )
        ]
    )
)