# PHQ2 Final Data Prep

In [None]:
%matplotlib inline
import datetime as dt
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from IPython.core.interactiveshell import InteractiveShell
import synapseclient
from synapseclient import Activity, Schema, Table, as_table_columns
from tqdm import tqdm

InteractiveShell.ast_node_interactivity = 'all'
syn = synapseclient.Synapse()
syn.login()
tqdm.pandas()

### Get the data

In [None]:
v1_raw_id, v2_raw_id = 'syn10250486', 'syn9974012'

v1r = pd.read_excel(syn.get(v1_raw_id).path)
v2r = pd.read_csv(syn.get(v2_raw_id).path, parse_dates=['createdAt'])

v1r.head()
v2r.head()

### Process V1 data

In [None]:
# drop the uneeded columns
v1r = v1r.drop(columns=[
    'sent_time_local', 'sent_time_utc', 'response_utc', 'response_id', 'user_id', 'start', 'phq2'
]).rename(columns={
    'brightenid': 'participant_id',
    'Feeling down, depressed, or hopeless.': 'phq2_1',
    'Little interest or pleasure in doing things.': 'phq2_2',
})

# add in yesterdays date
def dx(x):
    t = x-dt.timedelta(days=1)
    return dt.date(year=t.year, month=t.month, day=t.day)

v1r['dt_yesterday'] = v1r.response_local.apply(dx)

# add qsum
v1r['phq2_sum'] = [t.phq2_1 + t.phq2_2 for t in v1r.itertuples()]

### Process V2 Data

In [None]:
# rename some columns
v2r = v2r.rename(columns={
    'YESTERDAY, were you bothered by any of the following problems? Feeling down, depressed, or hopeless.':'phq2_1',
    'YESTERDAY, were you bothered by any of the following problems? Irritable or Anxious?':'phq2_2',
    'username':'participant_id',
    'createdAt': 'response_local'
})

# add yesterdays date
v2r['dt_yesterday'] = v2r.response_local.apply(dx)

# add qsum
v2r['phq2_sum'] = [t.phq2_1 + t.phq2_2 for t in v2r.itertuples()]

### Combine the DataFrames

In [None]:
combined = pd.concat([v1r, v2r], sort=False)
combined = combined.loc[:,
    ['participant_id', 'response_local', 'dt_yesterday', 'day', 'phq2_1', 'phq2_2', 'phq2_sum']
].rename(columns={'response_local':'dt_response'})
combined.head()

In [None]:
combined.phq2_1.hist(bins=5, rwidth=.8)
combined.phq2_1.value_counts()

In [None]:
combined.phq2_2.hist(bins=5, rwidth=.8)
combined.phq2_2.value_counts()

### Add week into study

In [None]:
metasid = 'syn17023349'
metadata = syn.tableQuery(f'SELECT participant_id, startdate FROM {metasid}').asDataFrame(convert_to_datetime=True)
metadata.startdate = pd.to_datetime(metadata.startdate)

# add in the participants start date as a new column
combined = pd.merge(combined, metadata, on='participant_id', how='left')

# get the time difference in weeks as a float
combined['week'] = [
    d.days/7 for d in (
        combined.dt_response.apply(
            lambda x: dt.datetime(year=x.year, month=x.month, day=x.day))-combined.startdate
    )
]

# convert the week number to an int by taking the floor
combined.week = combined.week.progress_apply(lambda x: np.int16(np.floor(x))+1)# if not pd.isnull(x) else np.nan)

# remove the start date
combined = combined.drop(columns=['startdate'], errors='ignore')

# reorder the columns
cols = list(combined.columns)
cols = cols[0:2] + ['week'] + cols[2:-1]
combined = combined.reindex(columns=cols)

combined.head()

### Localize timestamps

In [None]:
# localize timestamps
combined['dt_response'] = [
    str(t.tz_localize('UTC'))
    for t in combined.dt_response
]

combined.head()

### Set provevance and upload to Synapse

In [None]:
t = syn.delete(
    syn.tableQuery('select * from syn17020855')
)

In [None]:
phq2_final = syn.store(Table(
    Schema(
            name='PHQ-2',
            columns=as_table_columns(combined), 
            parent='syn10848316'),
        combined
    )
)

In [None]:
phq2_final = syn.setProvenance(
    'syn17020855',
    activity=Activity(
        name='Combine V1 and V2 data',
        description='Process and combine the data collected during study 1 and study 2',
        used=[v1_raw_id, v2_raw_id, metasid],
        executed=[
            dict(
                name='IPython Notebook',
                url='https://github.com/apratap/BRIGHTEN-Data-Release/blob/master/Create_PHQ2_datafiles.ipynb'
            )
        ]
    )
)