# Applications Final Data Prep

In [None]:
import datetime as dt
import itertools
import pandas as pd

from IPython.core.interactiveshell import InteractiveShell
import synapseclient
from synapseclient import Activity, Schema, Table, as_table_columns

InteractiveShell.ast_node_interactivity = 'all'
syn = synapseclient.Synapse()
syn.login()

def isnum(x):
    if x is None:
        return False
    try:
        float(x)
        return True
    except ValueError:
        return False

In [None]:
v1sid, v2sid = 'syn12181328', 'syn9974014'

v1 = pd.read_csv(syn.get(v1sid).path, parse_dates=['timestamp'])
v2 = pd.read_csv(syn.get(v2sid).path, parse_dates=['createdAt'])

v1.head()
v2.head()

### V1 Data Prep

In [None]:
# drop the uneeded columns
v1 = v1.drop(columns=[
    'timestampUTC', 'userid'
]).rename(columns={
    'brightenid': 'participant_id',
    'timestamp':'dt_response',
    'otherapps_1':'apps'
})

# conver to lowercase for lookups
v1.apps = v1.apps.apply(lambda x: x.lower() if not isnum(x) else 'none')

# print the unique application reasons
t = [print(a) for a in pd.unique(list(itertools.chain(*[t.split('|') for t in v1.apps])))]; del t

# add indicators for different app usage
v1['weight_management'] = v1.apps.apply(lambda x: int(x.find('weight management') > -1))
v1['medical'] =  v1.apps.apply(lambda x: int(x.find('medical') > -1))
v1['exercise_and_fitness'] =  v1.apps.apply(lambda x: int(x.find('exercise / fitness') > -1))
v1['sleep'] =  v1.apps.apply(lambda x: int(x.find('sleep') > -1))
v1['pain_management'] =  v1.apps.apply(lambda x: int(x.find('pain management') > -1))
v1['relaxation'] =  v1.apps.apply(lambda x: int(x.find('relaxation') > -1))
v1['alcohol'] =  v1.apps.apply(lambda x: int(x.find('alcohol') > -1))
v1['mood'] =  v1.apps.apply(lambda x: int(x.find('mood') > -1))
v1['concentration'] =  v1.apps.apply(lambda x: int(x.find('concentration') > -1))

# add a flag to easily distinguish which study the row originated from
v1['study'] = 'V1'

v1.head()

### V2 Data Prep

In [None]:
# drop the uneeded columns
v2 = v2.drop(columns=[
    'day'
]).rename(columns={
    'username': 'participant_id',
    'createdAt':'dt_response',
    'Why did you download this app?':'apps'
})

# conver to lowercase for lookups
v2.apps = v2.apps.apply(lambda x: x.lower() if not isnum(x) else 'none')
t = [print(a) for a in pd.unique(list(itertools.chain(*[t.split(',') for t in v2.apps])))]; del t

# add indicators for different app usage
v2['fun'] = v2.apps.apply(lambda x: int(x.find('fun') > -1))
v2['mental_health'] =  v2.apps.apply(lambda x: int(x.find('my mental health') > -1))
v2['mood'] =  v2.apps.apply(lambda x: int(x.find('my mood') > -1))
v2['managing_daily_issues'] =  v2.apps.apply(lambda x: int(x.find('managing daily issues') > -1))
v2['improve_work'] =  v2.apps.apply(lambda x: int(x.find('improve work') > -1))
v2['brain_health'] =  v2.apps.apply(lambda x: int(x.find('brain health') > -1))
v2['improve_relationships'] =  v2.apps.apply(lambda x: int(x.find('improve relationships') > -1))
v2['other'] =  v2.apps.apply(lambda x: int(x.find('other') > -1))

# add the study flag
v2['study'] = 'V2'
v2.head()

### Combine the DataFrames

In [None]:
combined = pd.concat([v1, v2], sort=False)
combined = combined.fillna(0)

combined = combined.rename(columns={
    'alcohol':            'app_a',
    'brain_health': 'app_bh',
    'concentration': 'app_c',
    'exercise_and_fitness': 'app_ef',
    'fun': 'app_f',
    'improve_relationships': 'app_ir',
    'improve_work': 'app_iw',
    'managing_daily_issues': 'app_mdi',
    'medical':'app_m',
    'mental_health': 'app_mh',
    'other': 'app_o',
    'pain_management': 'app_pm',
    'relaxation': 'app_r',
    'sleep':'app_s',
    'weight_management':'app_wm'
}).drop(columns='apps').loc[:, [
        'participant_id', 'dt_response', 'study', 
        'app_a', 'app_bh', 'app_c', 'app_ef', 'app_f', 'app_ir', 'app_iw', 'app_mdi', 
        'app_m', 'app_mh', 'app_o', 'app_pm', 'app_r', 'app_s', 'app_wm'
    ]
]

combined.head()

### Set provenance and upload to Synapse

In [None]:
app_final = syn.store(Table(
    Schema(
            name='Application Use Survey',
            columns=as_table_columns(combined), 
            parent='syn10848316'),
        combined
    )
)

In [None]:
phq2_final = syn.setProvenance(
    'syn17021581',
    activity=Activity(
        name='Combine V1 and V2 data',
        description='Process and combine the data collected during study 1 and study 2',
        used=[v1sid, v2sid],
        executed=[
            dict(
                name='IPython Notebook',
                url='https://github.com/apratap/BRIGHTEN-Data-Release/blob/master/Create_AppUsage_datafiles.ipynb'
            )
        ]
    )
)