In [161]:
import pandas as pd
import numpy as np
from dotenv import dotenv_values

from tqdm import tqdm

CONFIG = dotenv_values("./.env")

# Load data

In [146]:
df = pd.read_parquet(CONFIG['SIM_DIR'])
df.head()

Unnamed: 0,class,param,mean,sem,runs,arr_lam_0,base_duration_0,wait_effect_0,modality_effect_0,modality_policy_0,arr_lam_1,base_duration_1,wait_effect_1,modality_effect_1,modality_policy_1,arr_lam_2,base_duration_2,wait_effect_2,modality_effect_2,modality_policy_2
0,-1,age_out,0.240353,0.000263,5,3.333,7,0.0,-0.5,0.0,3.333,10,0.0,0.0,0.0,3.333,13,0.0,0.5,0.0
1,-1,wait_time,40.344478,0.054084,5,3.333,7,0.0,-0.5,0.0,3.333,10,0.0,0.0,0.0,3.333,13,0.0,0.5,0.0
2,0,age_out,0.232908,0.003589,5,3.333,7,0.0,-0.5,0.0,3.333,10,0.0,0.0,0.0,3.333,13,0.0,0.5,0.0
3,0,wait_time,39.754351,0.620999,5,3.333,7,0.0,-0.5,0.0,3.333,10,0.0,0.0,0.0,3.333,13,0.0,0.5,0.0
4,1,age_out,0.241941,0.003397,5,3.333,7,0.0,-0.5,0.0,3.333,10,0.0,0.0,0.0,3.333,13,0.0,0.5,0.0


In [147]:
print(f"Dataframe has {df.shape[0]} rows.")

Dataframe has 36000 rows.


Show the range of age out proportions and wait times

In [148]:
for outcome in ['age_out', 'wait_time']:
    print(f"Range of {outcome}: {df.loc[(df['param']==outcome) & (df['class']==-1), 'mean'].min():.3f} - {df.loc[(df['param']==outcome) & (df['class']==-1), 'mean'].max():.3f}")

Range of age_out: 0.238 - 0.310
Range of wait_time: 39.918 - 49.911


Show the wait effects

In [149]:
df[['wait_effect_0', 'wait_effect_1', 'wait_effect_2']].drop_duplicates()

Unnamed: 0,wait_effect_0,wait_effect_1,wait_effect_2
0,0.0,0.0,0.0


Show the different modality effects

In [150]:
df[['modality_effect_0', 'modality_effect_1', 'modality_effect_2']].drop_duplicates()

Unnamed: 0,modality_effect_0,modality_effect_1,modality_effect_2
0,-0.5,0.0,0.5
0,0.0,-0.5,0.5
0,0.0,0.5,-0.5
0,-0.5,0.5,0.0
0,0.5,-0.5,0.0
0,0.5,0.0,-0.5


Find the best modality policy based on total ageout proportion OR total wait time (indicated by class=-1) for a given modality effect parameterization.

In [151]:
modality_effect_params = [-1, 0, 1]
wait_effect_params = [-1, 0, 1]
outcome = 'wait_time'

temp = df.loc[(df['wait_effect_0']==wait_effect_params[0]) & (df['wait_effect_1']==wait_effect_params[1]) & (df['wait_effect_2']==wait_effect_params[2]) &
       (df['modality_effect_0']==modality_effect_params[0]) & (df['modality_effect_1']==modality_effect_params[1]) & (df['modality_effect_2']==modality_effect_params[2]) &
       (df['class']==-1) & (df['param']==outcome)]
temp.loc[(temp['mean']==temp['mean'].min())]

Unnamed: 0,class,param,mean,sem,runs,arr_lam_0,base_duration_0,wait_effect_0,modality_effect_0,modality_policy_0,arr_lam_1,base_duration_1,wait_effect_1,modality_effect_1,modality_policy_1,arr_lam_2,base_duration_2,wait_effect_2,modality_effect_2,modality_policy_2


Check under what conditions is not delivering all in-person treatment the best choice.

In [156]:
modality_effect_params = df[['modality_effect_0', 'modality_effect_1', 'modality_effect_2']].drop_duplicates()
wait_effect_params = df[['wait_effect_0', 'wait_effect_1', 'wait_effect_2']].drop_duplicates()
outcome = 'wait_time'

best_pols = pd.DataFrame()
for i, wait_row in tqdm(wait_effect_params.iterrows()):
    for j, mod_row in modality_effect_params.iterrows():
        temp = df.loc[(df['wait_effect_0']==wait_row['wait_effect_0']) & (df['wait_effect_1']==wait_row['wait_effect_1']) & (df['wait_effect_2']==wait_row['wait_effect_2']) &
                (df['modality_effect_0']==mod_row['modality_effect_0']) & (df['modality_effect_1']==mod_row['modality_effect_1']) & (df['modality_effect_2']==mod_row['modality_effect_2']) &
                (df['class']==-1) & (df['param']==outcome)]
        min_row = temp.loc[(temp['mean']==temp['mean'].min())]
        best_pols = pd.concat([best_pols, min_row], axis=0)

1it [00:00, 49.12it/s]


Check for occurences where all modality policies are not the same

In [157]:
from itertools import combinations
for i in combinations(['modality_policy_0', 'modality_policy_1', 'modality_policy_2'], 2):
    print(i)

temp = best_pols.loc[(best_pols['modality_policy_0'] != best_pols['modality_policy_2']) |
                     (best_pols['modality_policy_0'] != best_pols['modality_policy_1']) |
                     (best_pols['modality_policy_1'] != best_pols['modality_policy_2']),
              ['mean', 'wait_effect_0', 'wait_effect_1', 'wait_effect_2',
               'modality_effect_0', 'modality_effect_1', 'modality_effect_2',
               'modality_policy_0', 'modality_policy_1', 'modality_policy_2']]
temp

('modality_policy_0', 'modality_policy_1')
('modality_policy_0', 'modality_policy_2')
('modality_policy_1', 'modality_policy_2')


Unnamed: 0,mean,wait_effect_0,wait_effect_1,wait_effect_2,modality_effect_0,modality_effect_1,modality_effect_2,modality_policy_0,modality_policy_1,modality_policy_2
1,40.073169,0.0,0.0,0.0,-0.5,0.0,0.5,0.75,0.0,0.0
1,40.462171,0.0,0.0,0.0,0.0,0.5,-0.5,0.0,0.0,0.5
1,39.917983,0.0,0.0,0.0,-0.5,0.5,0.0,0.5,0.0,0.0
1,40.419139,0.0,0.0,0.0,0.5,-0.5,0.0,0.0,0.25,0.0


In [158]:
best_pols

Unnamed: 0,class,param,mean,sem,runs,arr_lam_0,base_duration_0,wait_effect_0,modality_effect_0,modality_policy_0,arr_lam_1,base_duration_1,wait_effect_1,modality_effect_1,modality_policy_1,arr_lam_2,base_duration_2,wait_effect_2,modality_effect_2,modality_policy_2
1,-1,wait_time,40.073169,0.28537,5,3.333,7,0.0,-0.5,0.75,3.333,10,0.0,0.0,0.0,3.333,13,0.0,0.5,0.0
1,-1,wait_time,40.121872,0.081099,5,3.333,7,0.0,0.0,0.0,3.333,10,0.0,-0.5,0.0,3.333,13,0.0,0.5,0.0
1,-1,wait_time,40.462171,0.245453,5,3.333,7,0.0,0.0,0.0,3.333,10,0.0,0.5,0.0,3.333,13,0.0,-0.5,0.5
1,-1,wait_time,39.917983,0.196354,5,3.333,7,0.0,-0.5,0.5,3.333,10,0.0,0.5,0.0,3.333,13,0.0,0.0,0.0
1,-1,wait_time,40.419139,0.22763,5,3.333,7,0.0,0.5,0.0,3.333,10,0.0,-0.5,0.25,3.333,13,0.0,0.0,0.0
1,-1,wait_time,40.352402,0.11544,5,3.333,7,0.0,0.5,0.0,3.333,10,0.0,0.0,0.0,3.333,13,0.0,-0.5,0.0


In [159]:
from itertools import permutations

count = 0
for i in permutations([-1, -0.5, 0, 0.5, 1], 3):
    count += 1
print(count)

60


In [162]:
best_pols.to_csv(CONFIG['BATCH_ANALYSIS_DIR'] + "best_pols_05.csv", index=False)