In [None]:
#export
%load_ext autoreload
%autoreload 2
from trr265.gbe.ist.data_provider import ISTDataProvider
from trr265.gbe.wm.data_provider import WMDataProvider
from trr265.gbe.sst.data_provider import SSTDataProvider
from trr265.gbe.rtt.data_provider import RTTDataProvider

import trr265.gbe.ist.scoring as ist_scoring 
import trr265.gbe.wm.scoring as wm_scoring 
import trr265.gbe.sst.scoring as sst_scoring 
import trr265.gbe.rtt.scoring as rtt_scoring 

import pandas as pd

In [None]:
# Getting raw data
dp = ISTDataProvider('/Users/hilmarzech/Projects/trr265/trr265/data/')
df = dp.get_ist_data()
# Adding data from redcap
df = df.merge(dp.get_gbe_data(columns = ['participant','session_number','is_initial','is_baseline']), left_on = 'gbe_index', right_index = True, how = 'left')
# Filtering out replication and ema data
df = df.query("is_initial")
ist = ist_scoring.get_oversampling_predicted_joint(df)[0]
ist.columns = ['ist_oversampling']
ist

Unnamed: 0_level_0,ist_oversampling
gbe_index,Unnamed: 1_level_1
b001_001,0.190418
b001_002,0.190418
b001_003,0.190418
b001_007,0.365325
b001_009,0.474641
...,...
m271_011,1.016484
m271_012,0.644808
m271_013,0.863441
m271_014,0.579218


In [None]:
# Getting raw data
dp = WMDataProvider('/Users/hilmarzech/Projects/trr265/trr265/data/')
df = dp.get_wm_data()
# Adding data from redcap
df = df.merge(dp.get_gbe_data(columns = ['participant','session_number','is_initial','is_baseline']), left_on = 'gbe_index', right_index = True, how = 'left')
# Filtering out replication and ema data
df = df.query("is_initial")
# Filtering participants with old app
df = dp.filter_old_app_sessions(df)
df = dp.filter_level_two_failures(df)
wm = wm_scoring.get_perc_correct_predicted_sep_trial(df)[0]
wm = wm.rename(columns={'perc_predicted_sep_trial_no_distractor_1': 'wm_no_1',
                       'perc_predicted_sep_trial_no_distractor_2': 'wm_no_2',
                       'perc_predicted_sep_trial_encoding_distractor': 'wm_encoding',
                       'perc_predicted_sep_trial_delayed_distractor':'wm_delayed'})

9 participants used an old version of the task in some of their sessions.  30 sessions (1.09%) were removed from the dataset.
31 sessions (1.14%) were removed because participants failed a level two trial.


In [None]:
# Getting raw data
dp = RTTDataProvider('/Users/hilmarzech/Projects/trr265/trr265/data/')
df = dp.get_rtt_data()
# Adding data from redcap
df = df.merge(dp.get_gbe_data(columns = ['participant','session_number','is_initial','is_baseline']), left_on = 'gbe_index', right_index = True, how = 'left')
# Filtering out replication and ema data
df = df.query("is_initial")
rtt = rtt_scoring.get_perc_gamble_predicted_joint(df)[0]
rtt = rtt.rename(columns={'perc_gamble_joint_win': 'rtt_win',
                       'perc_gamble_joint_loss': 'rtt_loss',
                       'perc_gamble_joint_mixed': 'rtt_mixed'})

In [None]:
# Getting raw data
dp = SSTDataProvider('/Users/hilmarzech/Projects/trr265/trr265/data/')
df = dp.get_sst_data()
# Adding data from redcap
df = df.merge(dp.get_gbe_data(columns = ['participant','session_number','is_initial','is_baseline']), left_on = 'gbe_index', right_index = True, how = 'left')
# Filtering out replication and ema data
df = df.query("is_initial")
sst = sst_scoring.get_ssrt_predicted_joint(df)[0]
sst.columns = ['ssrt']

In [None]:
tasks = pd.concat([wm[['wm_no_1']], sst, rtt[['rtt_win']],ist],axis = 1).reset_index()

In [None]:
tasks['session'] = tasks.gbe_index.str.split('_').apply(lambda x: x[1]).astype(int)
tasks['participant'] = tasks.gbe_index.str.split('_').apply(lambda x: x[0])

tasks

Unnamed: 0,gbe_index,wm_no_1,ssrt,rtt_win,ist_oversampling,session,participant
0,b001_001,0.975715,335.870776,0.532304,0.190418,1,b001
1,b001_002,0.911146,337.786765,0.750818,0.190418,2,b001
2,b001_003,0.911146,333.320828,0.240045,0.190418,3,b001
3,b001_007,0.975715,305.844152,0.240045,0.365325,7,b001
4,b001_009,0.975715,367.613820,0.240045,0.474641,9,b001
...,...,...,...,...,...,...,...
2790,m063_007,,,,-0.839476,7,m063
2791,m090_009,,,,-0.580886,9,m090
2792,m092_011,,,,-1.227142,11,m092
2793,m102_003,,,,-0.770376,3,m102


In [None]:
len(tasks)

2795

In [None]:
tasks.groupby('session').agg(lambda x: x.isnull().sum()).sum()

gbe_index             0
wm_no_1             114
ssrt                 41
rtt_win              51
ist_oversampling     78
dtype: int64

In [None]:
tasks.groupby('session').agg(lambda x: x.isnull().sum())

Unnamed: 0_level_0,gbe_index,wm_no_1,ssrt,rtt_win,ist_oversampling
session,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,0,15,4,5,13
2,0,12,2,3,10
3,0,12,5,4,10
4,0,4,0,2,6
5,0,15,3,5,6
6,0,6,1,2,3
7,0,10,4,5,10
8,0,8,4,5,2
9,0,8,8,5,6
10,0,9,3,9,6


In [None]:
tasks

Unnamed: 0,gbe_index,wm_no_1,ssrt,rtt_win,ist_oversampling,session,participant
0,b001_001,0.975715,335.870776,0.532304,0.190418,1,b001
1,b001_002,0.911146,337.786765,0.750818,0.190418,2,b001
2,b001_003,0.911146,333.320828,0.240045,0.190418,3,b001
3,b001_007,0.975715,305.844152,0.240045,0.365325,7,b001
4,b001_009,0.975715,367.613820,0.240045,0.474641,9,b001
...,...,...,...,...,...,...,...
2790,m063_007,,,,-0.839476,7,m063
2791,m090_009,,,,-0.580886,9,m090
2792,m092_011,,,,-1.227142,11,m092
2793,m102_003,,,,-0.770376,3,m102


In [None]:
sessions = 8
(tasks.query('session<%d'%(sessions+1)).groupby('participant').agg(lambda x: len(x.dropna()))==sessions).sum()

gbe_index           110
wm_no_1              95
ssrt                103
rtt_win              99
ist_oversampling     95
session             110
dtype: int64