# Final Mood DataFile Prep

In [None]:
import datetime as dt
import itertools
import pandas as pd

import matplotlib.pyplot as plt
from IPython.core.interactiveshell import InteractiveShell
import synapseclient
from synapseclient import Activity, File, Schema, Table, as_table_columns

InteractiveShell.ast_node_interactivity = 'all'
syn = synapseclient.Synapse()
syn.login()

def isnum(x):
    if x is None:
        return False
    try:
        float(x)
        return True
    except ValueError:
        return False

### Prep V2 data for public release

In [None]:
v2sid = 'syn9974019'

v2r = pd.read_csv(syn.get(v2sid).path, parse_dates=['createdAt'])
v2r.columns = ['username', 'mood_1', 'day', 'timestamp']
v2r = v2r.loc[:, ['username', 'timestamp', 'day', 'mood_1']]

v2r.head()

This survey must have been distributed and completed within the first week. Unsure of how the question applies, "Since starting this...how has your mood changed?"

In [None]:
v2r.day.hist()

All mood categories are represented.

In [None]:
v2r.mood_1.hist(bins=7)

In [None]:
v2r.reindex(['day', 'mood_1'], axis=1).corr()

### Upload to Synapse

In [None]:
name = 'V2_MoodAssessment.csv'
v2r.to_csv(name, index=None)

t = syn.setProvenance(
    syn.store(File(name=name, path=name, parent='')),
    activity=Activity(
        name='Public Release',
        description='Prepare data for public release',
        used=[v2sid],
        executed=[
            dict(
                name='IPython Notebook',
                url='https://github.com/apratap/BRIGHTEN-Data-Release/blob/master/Create_Mood_datafiles.ipynb'
            )
        ]
    )
); del t

### Prepare Data

In [None]:
v1sid, v2sid = 'syn12181335', 'syn17023109'

v1r = pd.read_csv(syn.get(v1sid).path, parse_dates=['timestamp'])
v2r = pd.read_csv(syn.get(v2sid).path, parse_dates=['timestamp'])

v1r.head()
v2r.head()

### Process V1 Data

In [4]:
v1r.mood_1.value_counts()

A little better        510
Almost the same        491
No change              288
Somewhat better        182
Moderately better      110
Better                  41
A great deal better     14
Name: mood_1, dtype: int64

In [None]:
v1 = v1r.drop(columns=[
    'userid', 'timestampUTC'
]).rename(columns={
    'brightenid':'participant_id',
    'timestamp':'dt_response'
})

# encode the mood response to match v2
def fx(x):
    if x == 'No change':
        return 1
    elif x == 'Almost the same':
        return 2
    elif x == 'A little better':
        return 3
    elif x == 'Somewhat better':
        return 4
    elif x == 'Moderately better':
        return 5
    elif x == 'Better':
        return 6
    elif x == 'A great deal better':
        return 7
    else:
        raise ValueError(f'encoding not mapped: {x}')
    
v1.mood_1 = v1.mood_1.apply(fx)

### Process V2 Data

In [None]:
v2 = v2r.drop(columns=['day']).rename(columns={
    'username':'participant_id',
    'timestamp':'dt_response'
})

### Combine

In [None]:
combined = pd.concat([v1, v2], sort=False)
combined.head()

### Set provenance and upload to Synapse

In [None]:
final = syn.store(Table(
    Schema(
            name='Mood Assessment',
            columns=as_table_columns(combined), 
            parent='syn10848316'),
        combined
    )
)

In [None]:
final = syn.setProvenance(
    'syn17023313',
    activity=Activity(
        name='Combine V1 and V2 data',
        description='Process and combine the data collected during study 1 and study 2',
        used=[v1sid, v2sid],
        executed=[
            dict(
                name='IPython Notebook',
                url='https://github.com/apratap/BRIGHTEN-Data-Release/blob/master/Create_ImpactMHS_datafiles.ipynb'
            )
        ]
    )
)