In [None]:
# default_exp gbe.ema_drinking

# EMA drinking

In [None]:
update_all_data = False

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
%reload_ext rpy2.ipython
import seaborn as sns
from matplotlib import pyplot as plt
custom_params = {"axes.spines.right": False, "axes.spines.top": False}
sns.set_theme(style="white", font_scale = 1.3, rc=custom_params)
from IPython.core.display import HTML


In [None]:
#export
%load_ext autoreload
%autoreload 2
from trr265.gbe.ist.data_provider import ISTDataProvider
from trr265.gbe.wm.data_provider import WMDataProvider
from trr265.gbe.sst.data_provider import SSTDataProvider
from trr265.gbe.rtt.data_provider import RTTDataProvider

import trr265.gbe.ist.scoring as ist_scoring 
import trr265.gbe.wm.scoring as wm_scoring 
import trr265.gbe.sst.scoring as sst_scoring 
import trr265.gbe.rtt.scoring as rtt_scoring 

import pandas as pd

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Loading the data
In this notebook, all tests and examples are run on the initial baseline dataset.

### Global dataset

In [None]:
import numpy as np
def get_ba_data():
    '''This function reads in baseline data and creates movisens IDs.'''
    df = pd.read_stata('/Users/hilmarzech/Projects/trr265/trr265/data/raw/TRR265_PID78_20220623/BA_S01_t0.dta',convert_categoricals=False).query('t0_dashboard_aud==1')
    df['center'] = df.groupby('participant_id').t0_bx_center.transform(lambda x: x.ffill().bfill())
    df['center'] = df.center.replace({1:'b',2:'d',3:'m'})
    # Creating new movisense IDs (adding center prefix to movisense IDs)
    for old_id in ['t0_bx_movisens','t0_bx_movisens_old','t0_bx_movisens_old_2']:
        new_id = old_id.replace('t0_bx_','').replace('movisens','mov_id')
        df[new_id] = df.groupby('participant_id')[old_id].transform(lambda x: x.ffill().bfill())
        df[new_id] = df.center + df[new_id].astype('str').str.strip('0').str.strip('.').apply(lambda x: x.zfill(3))
        df[new_id].fillna('nan',inplace = True)
        df.loc[df[new_id].str.contains('nan'),new_id] = np.nan
    return df
gba = get_ba_data()

In [None]:
#gba = pd.read_stata('/Users/hilmarzech/Projects/trr265/trr265/data/raw/TRR265_PID78_20220623/BA_S01_t0.dta',convert_categoricals=False).query('t0_dashboard_aud==1')

### Participants with movisens id

In [None]:
display(HTML('Of the %d AUD participants in the global dataset, %d have a specified movisens ID.'%(len(gba), (gba.t0_bx_movisens.isna()==False).sum())))

In [None]:
dp = ISTDataProvider('/Users/hilmarzech/Projects/trr265/trr265/data/')

# Replacing old with new movisens IDs
gba['mov_id'] = gba.mov_id.replace(dp.get_duplicate_mov_ids())

In [None]:
# Getting mov_data for same time frame as gba data (+5 days because some lag between redcap and movsens start)
mov_data = dp.get_mov_data(update = True).query('starting_date<20220628')

Preparing mov_data
Participants: b186, b240, b295, b313, b317, b331, b353, b357, d161, d188, d207, m158, m257, m283, m304, m338 have no associated redcap IDs and are excluded from the following analyses.


Participants in global dataset but not in movisens data.

### Participants with movisens data

In [None]:
display(HTML('''Of the redcap participants with a movisense ID, %d do not have any movisens data.'''%len(set(gba.mov_id.dropna()).difference(set(mov_data.participant)))))

### Participants with drinking data

In [None]:
# Getting mov_data for same time frame as gba data (+5 days because some lag between redcap and movsens start)
two_day = dp.get_two_day_data(update = True).query('starting_date<20220628')

Preparing two_day_data


In [None]:
display(HTML('''Of the redcap participants with movisense ID, %d do not have any drinking data.'''%len(set(gba.mov_id.dropna()).difference(set(two_day.participant)))))

### Participants raw gbe data

In [None]:
gbe = dp.get_gbe_data()[['participant','session_number','FruitTapGame','WorkingMemoryGame','CardGame','RewardAndHappinessGame']]

#### Working memory

In [None]:
from trr265.gbe.wm.data_provider import WMDataProvider
# Getting raw data
dp = WMDataProvider('/Users/hilmarzech/Projects/trr265/trr265/data/')
df = dp.get_wm_data()
display(HTML('''Of the redcap participants with a movisense ID, %d do not have any working memory data.'''%len(set(gba.mov_id.dropna()).difference(set(df.gbe_index.apply(lambda x: x.split('_')[0]).unique())))))

#### Information sampling 

In [None]:
from trr265.gbe.ist.data_provider import ISTDataProvider
# Getting raw data
dp = ISTDataProvider('/Users/hilmarzech/Projects/trr265/trr265/data/')
df = dp.get_ist_data()
display(HTML('''Of the redcap participants with a movisense ID, %d do not have any information sampling data.'''%len(set(gba.mov_id.dropna()).difference(set(df.gbe_index.apply(lambda x: x.split('_')[0]).unique())))))

#### SST

In [None]:
from trr265.gbe.sst.data_provider import SSTDataProvider
# Getting raw data
dp = SSTDataProvider('/Users/hilmarzech/Projects/trr265/trr265/data/')
df = dp.get_sst_data()
display(HTML('''Of the redcap participants with a movisense ID, %d do not have any SST data.'''%len(set(gba.mov_id.dropna()).difference(set(df.gbe_index.apply(lambda x: x.split('_')[0]).unique())))))

#### RTT

In [None]:
from trr265.gbe.rtt.data_provider import RTTDataProvider
# Getting raw data
dp = RTTDataProvider('/Users/hilmarzech/Projects/trr265/trr265/data/')
df = dp.get_rtt_data()
display(HTML('''Of the redcap participants with a movisense ID, %d do not have any RTT data.'''%len(set(gba.mov_id.dropna()).difference(set(df.gbe_index.apply(lambda x: x.split('_')[0]).unique())))))

### Participants with processed task data

### Factor scores
(based on baseline data)

In [None]:
factor_scores = pd.read_pickle('../data/interim/factor_scores.pkl')

In [None]:
len(factor_scores.participant.unique())

547