In [None]:
%matplotlib inline

import datetime as dt
import itertools as it
import numpy as np

from IPython.core.interactiveshell import InteractiveShell
import matplotlib.pyplot as plt
import pandas as pd
import synapseclient
from synapseclient import Activity, Schema, Table, as_table_columns
from tqdm import tqdm

InteractiveShell.ast_node_interactivity = 'all'
syn = synapseclient.Synapse()
syn.login()

tqdm.pandas()

In [None]:
v1sid = 'syn10250483'

v1r = pd.read_excel(syn.get(v1sid).path, parse_dates=['response_utc'])

v1r.head()

In [None]:
v1 = v1r.drop(columns=[
    'user_id', 'sent_time_local', 'sent_time_utc', 'response_local', 'response_id', 'start'
]).rename(columns={
    'brightenid':'participant_id',
    'response_utc':'dt_response',
    'otherapps':'apps',
}).loc[:, ['participant_id', 'dt_response', 'week', 'apps']]

v1.head()

In [None]:
def isnum(x):
    if x is None:
        return False
    try:
        float(x)
        return True
    except ValueError:
        return False

# find all the unique app reasons
t = [print(i) for i in pd.unique(list(it.chain.from_iterable([t.split('|') for t in v1.apps if not isnum(t)])))]; del t

In [None]:
def fx(x, key):
        return int(not isnum(x) and x.find(key) > -1)

# throw in the indicator variables
v1['app_al'] = v1.apps.apply(lambda x: fx(x, 'Alcohol'))
v1['app_ct'] = v1.apps.apply(lambda x: fx(x, 'Concentration'))
v1['app_ef'] = v1.apps.apply(lambda x: fx(x, 'Exercise / fitness'))
v1['app_md'] = v1.apps.apply(lambda x: fx(x, 'Medical'))
v1['app_mo'] = v1.apps.apply(lambda x: fx(x, 'Mood'))
v1['app_pm'] = v1.apps.apply(lambda x: fx(x, 'Pain management'))
v1['app_rx'] = v1.apps.apply(lambda x: fx(x, 'Relaxation'))
v1['app_sl'] = v1.apps.apply(lambda x: fx(x, 'Sleep'))
v1['app_wm'] = v1.apps.apply(lambda x: fx(x, 'Weight management'))

v1 = v1.drop(columns=['apps'])
v1.head()

### Add week into study

In [None]:
metasid = 'syn17023349'
metadata = syn.tableQuery(f'SELECT participant_id, startdate FROM {metasid}').asDataFrame(convert_to_datetime=True)
metadata.startdate = pd.to_datetime(metadata.startdate)

# add in the participants start date as a new column
v1 = pd.merge(v1, metadata, on='participant_id', how='left')

# get the time difference in weeks as a float
v1['week'] = [
    d.days/7 for d in (
        v1.dt_response.apply(
            lambda x: dt.datetime(year=x.year, month=x.month, day=x.day))-v1.startdate
    )
]

# convert the week number to an int by taking the floor
v1.week = v1.week.progress_apply(lambda x: np.int16(np.floor(x))+1)# if not pd.isnull(x) else np.nan)

# remove the start date
v1 = v1.drop(columns=['startdate'], errors='ignore')

### Localize timestamps

In [None]:
# localize timestamps
v1['dt_response'] = [
    str(t.tz_localize('UTC'))
    for t in v1.dt_response
]

v1.head()

### Upload to Synapse

In [None]:
t = syn.delete(
    syn.tableQuery('select * from syn17025058')
)

In [None]:
final = syn.store(Table(
    Schema(
            name='Other Mobile Apps Used',
            columns=as_table_columns(v1), 
            parent='syn10848316'),
        v1
    )
)

In [None]:
final = syn.setProvenance(
    'syn17025058',
    activity=Activity(
        name='Combine V1 and V2 data',
        description='Process and combine the data collected during study 1 and study 2',
        used=[v1sid],
        executed=[
            dict(
                name='IPython Notebook',
                url='https://github.com/apratap/BRIGHTEN-Data-Release/blob/master/Create_AdditionalApplications_datafiles.ipynb'
            )
        ]
    )
)