In [1]:
import numpy as np
import pandas as pd
import os

In [2]:
filepath = '../../simulations_data'

In [3]:
full_df_bin = pd.read_csv(f'{filepath}/all_sims_binary_outcomes.csv')
full_df_nan = pd.read_csv(f'{filepath}/all_sims_nan.csv')

In [4]:
drop_cols = ['Iter', 'Covs', 'T Setting', 'T Drop Setting', 'DGP', 'Binary Dose', 
             'Observed', 'Expert', 'Inaction', 'Random', 'Full Dosing']

## Section 6 Results

In [5]:
t_setting = 'b'
dgp = 'informed'

keep_rows = (full_df_bin['T Setting'] == t_setting) & (full_df_bin['DGP'] == dgp)
this_df = full_df_bin[keep_rows]

this_df = this_df.drop(columns=drop_cols).astype('float').groupby('Sim').mean()
this_df = this_df[[c for c in this_df.columns if c not in drop_cols]]
this_df = this_df[[c for c in this_df.columns if 'R3' not in c]]  # drop oracle reward methods

rankings = {m:[] for m in this_df.columns}
for i, row in this_df.iterrows():
    this_row = row.sort_values()
    these_nan = list(this_row[this_row.isna()].index)
    this_order = list(this_row.dropna().index)
    for r,v in rankings.items():
        v.append(this_order.index(r) if r in this_order else np.nan)
        
rankings = pd.DataFrame(rankings)
for m in rankings.columns:  # replace all multi methods with the binary version when the dose is binary
    if 'Multi' in m:
        try:
            rankings[m] = rankings[m].fillna(rankings[m.replace(' Multi', '')])
        except:
            rankings[m] = rankings[m].fillna(rankings[m.replace(' Multi', ' (Python)')])

In [7]:
print('Total number of simulation setups:')
print(rankings.shape[0])
print()
print('Top performing method counts')
print((rankings <= 0).sum().sort_values(ascending=False).iloc[:5])
print()
print()
print('Max percentage points from top performing method across simulation setups WITHOUT ORACLE METHODS:')
print(this_df.subtract(this_df.min(axis=1), axis=0).max(axis=0).sort_values().iloc[:5])
print()

Total number of simulation setups:
8

Top performing method counts
Our Method             8
CQL R1                 0
Linear Inf Multi R2    0
RF Inf Multi R1        0
RF Inf Multi R2        0
dtype: int64


Max percentage points from top performing method across simulation setups WITHOUT ORACLE METHODS:
Our Method    0.00000
CQL R1        0.03600
CQL R2        0.04015
CRR R2        0.06540
BCQ R1        0.07170
dtype: float64



## Appendix Results

In [8]:
t_setting = 'b'
t_drop_setting = 'b'

keep_rows = (full_df_bin['T Setting'] == t_setting) & (full_df_bin['T Drop Setting'] == t_drop_setting)
this_df = full_df_bin[keep_rows]

this_df = this_df.drop(columns=drop_cols).astype('float').groupby('Sim').mean()
this_df = this_df[[c for c in this_df.columns if c not in drop_cols]]
this_df = this_df[[c for c in this_df.columns if 'R3' not in c]]  # drop oracle reward methods

rankings = {m:[] for m in this_df.columns}
for i, row in this_df.iterrows():
    this_row = row.sort_values()
    these_nan = list(this_row[this_row.isna()].index)
    this_order = list(this_row.dropna().index)
    for r,v in rankings.items():
        v.append(this_order.index(r) if r in this_order else np.nan)
        
rankings = pd.DataFrame(rankings)
for m in rankings.columns:
    if 'Multi' in m:
        try:
            rankings[m] = rankings[m].fillna(rankings[m.replace(' Multi', '')])
        except:
            rankings[m] = rankings[m].fillna(rankings[m.replace(' Multi', ' (Python)')])

In [9]:
print('Total number of simulation setups:')
print(rankings.shape[0])
print()
print('Top performing method counts')
print((rankings <= 0).sum().sort_values(ascending=False).iloc[:5])
print()
print('In top 4 performing method counts')
print((rankings <= 3).sum().sort_values(ascending=False).iloc[:5])
print()
print('Max percentage points from top performing method across simulation setups WITHOUT ORACLE METHODS:')
print(this_df.subtract(this_df.min(axis=1), axis=0).max(axis=0).sort_values().iloc[:5])
print()

Total number of simulation setups:
8

Top performing method counts
Our Method             5
Linear Inf R1          2
Linear Inf Multi R1    2
SAC R1                 1
SAC R2                 0
dtype: int64

In top 4 performing method counts
Our Method                    8
Linear Inf R1                 6
Linear Inf Multi R1           4
Linear Inf Multi R2           3
Linear Q-learning (Python)    3
dtype: int64

Max percentage points from top performing method across simulation setups WITHOUT ORACLE METHODS:
Our Method       0.04495
SAC R1           0.10650
Linear Inf R1    0.20215
CQL R1           0.26160
BCQ R1           0.27580
dtype: float64



In [10]:
t_setting = 'b'
t_drop_setting = 'b'

keep_rows = (full_df_bin['T Setting'] == t_setting) & (full_df_bin['T Drop Setting'] == t_drop_setting)
this_df = full_df_bin[keep_rows]

this_df = this_df.drop(columns=drop_cols).astype('float').groupby('Sim').mean()
this_df = this_df[[c for c in this_df.columns if c not in drop_cols]]
# this_df = this_df[[c for c in this_df.columns if 'R3' not in c]]  # drop oracle reward methods

print('Max percentage points from top performing method across simulation setups WITH ORACLE METHODS:')
this_df.subtract(this_df.min(axis=1), axis=0).max(axis=0).sort_values().iloc[:5]

Max percentage points from top performing method across simulation setups WITH ORACLE METHODS:


Our Method             0.11730
SAC R3                 0.15085
Linear Inf Multi R3    0.18620
Linear Inf R1          0.21685
SAC R1                 0.22035
dtype: float64

In [11]:
this_df = full_df_bin

this_df = this_df.drop(columns=drop_cols).astype('float').groupby('Sim').mean()
this_df = this_df[[c for c in this_df.columns if c not in drop_cols]]
this_df = this_df[[c for c in this_df.columns if 'R3' not in c]]  # drop oracle reward methods

rankings = {m:[] for m in this_df.columns}
for i, row in this_df.iterrows():
    this_row = row.sort_values()
    these_nan = list(this_row[this_row.isna()].index)
    this_order = list(this_row.dropna().index)
    for r,v in rankings.items():
        v.append(this_order.index(r) if r in this_order else np.nan)
        
rankings = pd.DataFrame(rankings)
for m in rankings.columns:
    if 'Multi' in m:
        try:
            rankings[m] = rankings[m].fillna(rankings[m.replace(' Multi', '')])
        except:
            rankings[m] = rankings[m].fillna(rankings[m.replace(' Multi', ' (Python)')])

In [12]:
print('Total number of simulation setups:')
print(rankings.shape[0])
print()
print('Top performing method counts')
print((rankings <= 0).sum().sort_values(ascending=False).iloc[:5])
print()
print('In top 4 performing method counts')
print((rankings <= 3).sum().sort_values(ascending=False).iloc[:5])
print()
print('Max percentage points from top performing method across simulation setups WITHOUT ORACLE METHODS:')
print(this_df.subtract(this_df.min(axis=1), axis=0).max(axis=0).sort_values().iloc[:5])
print()

Total number of simulation setups:
32

Top performing method counts
Our Method                17
Linear Inf R1              6
Linear Inf Multi R1        5
Linear BOWL R1             3
RF Q-learning (Python)     2
dtype: int64

In top 4 performing method counts
Our Method                 29
Linear Inf R1              17
Linear Q-learning Multi    11
SV Q-learning Multi        11
Linear Inf Multi R2         9
dtype: int64

Max percentage points from top performing method across simulation setups WITHOUT ORACLE METHODS:
Our Method             0.100600
CRR R2                 0.368176
Linear Inf Multi R2    0.379050
CRR R1                 0.384400
CQL R1                 0.386500
dtype: float64



In [13]:
this_df = full_df_bin
this_df = this_df.drop(columns=drop_cols).astype('float').groupby('Sim').mean()
this_df = this_df[[c for c in this_df.columns if c not in drop_cols]]
# this_df = this_df[[c for c in this_df.columns if 'R3' not in c]]  # drop oracle reward methods

print('Max percentage points from top performing method across simulation setups WITH ORACLE METHODS:')
this_df.subtract(this_df.min(axis=1), axis=0).max(axis=0).sort_values().iloc[:5]

Max percentage points from top performing method across simulation setups WITH ORACLE METHODS:


Our Method             0.150000
Linear Inf Multi R3    0.229050
SV Inf Multi R3        0.299700
CRR R3                 0.360176
CQL R3                 0.378450
dtype: float64

In [14]:
dgp = 'informed'

keep_rows = (full_df_bin['DGP'] == dgp)
this_df = full_df_bin[keep_rows]

this_df = this_df.drop(columns=drop_cols).astype('float').groupby('Sim').mean()
this_df = this_df[[c for c in this_df.columns if c not in drop_cols]]
this_df = this_df[[c for c in this_df.columns if 'R3' not in c]]  # drop oracle reward methods

rankings = {m:[] for m in this_df.columns}
for i, row in this_df.iterrows():
    this_row = row.sort_values()
    these_nan = list(this_row[this_row.isna()].index)
    this_order = list(this_row.dropna().index)
    for r,v in rankings.items():
        v.append(this_order.index(r) if r in this_order else np.nan)
        
rankings = pd.DataFrame(rankings)
for m in rankings.columns:
    if 'Multi' in m:
        try:
            rankings[m] = rankings[m].fillna(rankings[m.replace(' Multi', '')])
        except:
            rankings[m] = rankings[m].fillna(rankings[m.replace(' Multi', ' (Python)')])

In [15]:
print('Total number of simulation setups:')
print(rankings.shape[0])
print()
print('Top performing method counts')
print((rankings <= 0).sum().sort_values(ascending=False).iloc[:5])
print()
print('In top 4 performing method counts')
print((rankings <= 3).sum().sort_values(ascending=False).iloc[:5])
print()
print('Max percentage points from top performing method across simulation setups WITHOUT ORACLE METHODS:')
print(this_df.subtract(this_df.min(axis=1), axis=0).max(axis=0).sort_values().iloc[:5])
print()

Total number of simulation setups:
16

Top performing method counts
Our Method                12
Linear BOWL R1             2
RF Q-learning (Python)     1
SV Q-learning (Python)     1
RF Q-learning Multi        1
dtype: int64

In top 4 performing method counts
Our Method             15
Linear Inf R1           8
Linear Inf Multi R2     6
Linear Inf R2           6
SV Q-learning Multi     5
dtype: int64

Max percentage points from top performing method across simulation setups WITHOUT ORACLE METHODS:
Our Method             0.079550
BCQ R2                 0.281824
SAC R2                 0.299100
BCQ R1                 0.300666
Linear Inf Multi R2    0.305050
dtype: float64



In [16]:
dgp = 'random'

keep_rows = (full_df_bin['DGP'] == dgp)
this_df = full_df_bin[keep_rows]

this_df = this_df.drop(columns=drop_cols).astype('float').groupby('Sim').mean()
this_df = this_df[[c for c in this_df.columns if c not in drop_cols]]
this_df = this_df[[c for c in this_df.columns if 'R3' not in c]]  # drop oracle reward methods

rankings = {m:[] for m in this_df.columns}
for i, row in this_df.iterrows():
    this_row = row.sort_values()
    these_nan = list(this_row[this_row.isna()].index)
    this_order = list(this_row.dropna().index)
    for r,v in rankings.items():
        v.append(this_order.index(r) if r in this_order else np.nan)
        
rankings = pd.DataFrame(rankings)
for m in rankings.columns:
    if 'Multi' in m:
        try:
            rankings[m] = rankings[m].fillna(rankings[m.replace(' Multi', '')])
        except:
            rankings[m] = rankings[m].fillna(rankings[m.replace(' Multi', ' (Python)')])

In [17]:
print('Total number of simulation setups:')
print(rankings.shape[0])
print()
print('Top performing method counts')
print((rankings <= 0).sum().sort_values(ascending=False).iloc[:5])
print()
print('In top 4 performing method counts')
print((rankings <= 3).sum().sort_values(ascending=False).iloc[:5])
print()
print('Max percentage points from top performing method across simulation setups WITHOUT ORACLE METHODS:')
print(this_df.subtract(this_df.min(axis=1), axis=0).max(axis=0).sort_values().iloc[:5])
print()

Total number of simulation setups:
16

Top performing method counts
Linear Inf R1              6
Our Method                 5
Linear Inf Multi R1        5
SAC R1                     2
Linear Q-learning Multi    1
dtype: int64

In top 4 performing method counts
Our Method                 14
Linear Inf R1               9
Linear Q-learning Multi     9
RF Inf R1                   7
SV Q-learning Multi         6
dtype: int64

Max percentage points from top performing method across simulation setups WITHOUT ORACLE METHODS:
Our Method       0.10060
Linear Inf R1    0.28600
Linear Inf R2    0.32675
RF Inf R1        0.34015
SV Inf R1        0.34385
dtype: float64

