In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tqdm
import os

from opioid_agent import OpioidAgent, OpioidAgentParams
from opioid_model import opioid_XGBoost_model, opioid_mlp_model
from opioid_env import OpioidPrescribeEnv

import opioid_env
import opioid_experiment
import time
from opioid_fairness_metrics import *
from opioid_simulation import *

import sys
sys.path.insert(0, os.getcwd() + '/../')
import vanilla_monte_carlo

In [2]:
# simulation_data_dir = "./mimic_data_after_preprocess/simulation_set25.csv"
# simulation_data = pd.read_csv(simulation_data_dir, index_col='subject_id')
# init_params = {'simulation_data': simulation_data}


# xgboost_model_dir = "./mimic-model/models/model_xgboost.pkl"
# xgboost_threshold_dir = "./mimic-model/models/thresholds.pkl"
# xgboost_precision_dir = "./mimic-model/models/precision_xgboost.pkl"
# mlp_model_dir = "./mimic-model/models/model_mlp.pth"
# mlp_threshold_dir = "./mimic-model/models/thresholds.pkl"
# mlp_precision_dir = "./mimic-model/models/precision_xgboost.pkl"

potential_model_name = ['xgboost', 'mlp']
potential_thresholds = [0.3, 0.4, 0.45, 0.5, 0.55, 0.6, 0.7]
transition_func_type = None
potential_sample_ratio = [0.01]
potential_fairness_requirement = [0.038]
potential_n_time_steps = [500]
potential_n_hosp_mode = ['expectation', 'same', 'random_1', 'random_2']
potential_n_prescription_mode = ['expectation', 'random_1', 'random_2']

# get all possible combinations of parameters
all_params = [potential_model_name, potential_thresholds, potential_sample_ratio,
              potential_fairness_requirement, potential_n_time_steps, 
              potential_n_hosp_mode, potential_n_prescription_mode]
all_params = np.array(np.meshgrid(*all_params)).T.reshape(-1, len(all_params))

In [3]:
all_params.shape

(168, 7)

In [4]:
# load results from previous saved pickle
import pickle
results = []
with open('./simulation_results/full168_w_utility.pkl', 'rb') as f:
    r = pickle.load(f)
    results = r


In [5]:
len(results)

168

In [6]:
results[10][1][2][0].unfairness_list[0] == results[10][1][2][7].unfairness_list[0]

True

In [None]:
df_all_params = pd.DataFrame(columns=['model', 'threshold', 'n_hosp_mode', 'n_prescription_mode'], data=all_params[:, [0,1,5,6]])
df_all_params['avg_max_unfairness_risk'] = 0.0
df_all_params['avg_mean_unfairness_risk'] = 0.0
df_all_params['initial_unfairness_risk'] = 0.0
df_all_params['avg_max_gap_tpr'] = 0.0
df_all_params['avg_mean_gap_tpr'] = 0.0
df_all_params['initial_gap_tpr'] = 0.0
df_all_params['avg_max_gap_fpr'] = 0.0
df_all_params['avg_mean_gap_fpr'] = 0.0
df_all_params['initial_gap_fpr'] = 0.0
df_all_params['avg_max_gap_precision'] = 0.0
df_all_params['avg_mean_gap_precision'] = 0.0
df_all_params['initial_gap_precision'] = 0.0
df_all_params['avg_max_gap_f1'] = 0.0
df_all_params['avg_mean_gap_f1'] = 0.0
df_all_params['initial_gap_f1'] = 0.0
df_all_params['avg_max_gap_accuracy'] = 0.0
df_all_params['avg_mean_gap_accuracy'] = 0.0
df_all_params['initial_gap_accuracy'] = 0.0
df_all_params['avg_utility_tpr'] = 0.0
df_all_params['avg_utility_fpr'] = 0.0
df_all_params['avg_utility_precision'] = 0.0
df_all_params['avg_utility_f1'] = 0.0
df_all_params['avg_utility_accuracy'] = 0.0
df_all_params['n_traces'] = 0

# get the unfairness score and utility for each configuration
for i in range(len(results)):
    # create unfairness lists
    max_unfairness_risk_list = []
    mean_unfairness_risk_list = []
    initial_unfairness_risk_list = []
    max_gap_tpr_list, mean_gap_tpr_list, initial_gap_tpr_list = [], [], []
    max_gap_fpr_list, mean_gap_fpr_list, initial_gap_fpr_list = [], [], []
    max_gap_precision_list, mean_gap_precision_list, initial_gap_precision_list = [], [], []
    max_gap_f1_list, mean_gap_f1_list, initial_gap_f1_list = [], [], []
    max_gap_accuracy_list, mean_gap_accuracy_list, initial_gap_accuracy_list = [], [], []
    # create utility lists
    utility_tpr_list, utility_fpr_list, utility_precision_list, utility_f1_list, utility_accuracy_list = [], [], [], [], []

    # iterate through all traces
    for his in results[i][1][2]:
        max_unfairness_risk_list.append(np.max(his.unfairness_list))
        mean_unfairness_risk_list.append(np.mean(his.unfairness_list))
        initial_unfairness_risk_list.append(his.unfairness_list[0])

        nparray_tpr_list = np.array(his.tpr_list)
        tpr_gap_list = np.abs(nparray_tpr_list[:, 1] - nparray_tpr_list[:, 2])
        max_gap_tpr_list.append(np.max(tpr_gap_list))
        mean_gap_tpr_list.append(np.mean(tpr_gap_list))
        initial_gap_tpr_list.append(tpr_gap_list[0])
        utility_tpr_list.append(np.mean(nparray_tpr_list[:, 0]))

        nparray_fpr_list = np.array(his.fpr_list)
        fpr_gap_list = np.abs(nparray_fpr_list[:, 1] - nparray_fpr_list[:, 2])
        max_gap_fpr_list.append(np.max(fpr_gap_list))
        mean_gap_fpr_list.append(np.mean(fpr_gap_list))
        initial_gap_fpr_list.append(fpr_gap_list[0])
        utility_fpr_list.append(np.mean(nparray_fpr_list[:, 0]))

        nparray_precision_list = np.array(his.precision_list)
        precision_gap_list = np.abs(nparray_precision_list[:, 1] - nparray_precision_list[:, 2])
        max_gap_precision_list.append(np.max(precision_gap_list))
        mean_gap_precision_list.append(np.mean(precision_gap_list))
        initial_gap_precision_list.append(precision_gap_list[0])
        utility_precision_list.append(np.mean(nparray_precision_list[:, 0]))

        nparray_f1_list = np.array(his.f1_list)
        f1_gap_list = np.abs(nparray_f1_list[:, 1] - nparray_f1_list[:, 2])
        max_gap_f1_list.append(np.max(f1_gap_list))
        mean_gap_f1_list.append(np.mean(f1_gap_list))
        initial_gap_f1_list.append(f1_gap_list[0])
        utility_f1_list.append(np.mean(nparray_f1_list[:, 0]))

        nparray_accuracy_list = np.array(his.accuracy_list)
        accuracy_gap_list = np.abs(nparray_accuracy_list[:, 1] - nparray_accuracy_list[:, 2])
        max_gap_accuracy_list.append(np.max(accuracy_gap_list))
        mean_gap_accuracy_list.append(np.mean(accuracy_gap_list))
        initial_gap_accuracy_list.append(accuracy_gap_list[0])
        utility_accuracy_list.append(np.mean(nparray_accuracy_list[:, 0]))

    df_all_params['avg_max_unfairness_risk'][i] = np.mean(max_unfairness_risk_list)
    df_all_params['avg_mean_unfairness_risk'][i] = np.mean(mean_unfairness_risk_list)
    df_all_params['initial_unfairness_risk'][i] = np.mean(initial_unfairness_risk_list)

    df_all_params['avg_max_gap_tpr'][i] = np.mean(max_gap_tpr_list)
    df_all_params['avg_mean_gap_tpr'][i] = np.mean(mean_gap_tpr_list)
    df_all_params['initial_gap_tpr'][i] = np.mean(initial_gap_tpr_list)

    df_all_params['avg_max_gap_fpr'][i] = np.mean(max_gap_fpr_list)
    df_all_params['avg_mean_gap_fpr'][i] = np.mean(mean_gap_fpr_list)
    df_all_params['initial_gap_fpr'][i] = np.mean(initial_gap_fpr_list)

    df_all_params['avg_max_gap_precision'][i] = np.mean(max_gap_precision_list)
    df_all_params['avg_mean_gap_precision'][i] = np.mean(mean_gap_precision_list)
    df_all_params['initial_gap_precision'][i] = np.mean(initial_gap_precision_list)

    df_all_params['avg_max_gap_f1'][i] = np.mean(max_gap_f1_list)
    df_all_params['avg_mean_gap_f1'][i] = np.mean(mean_gap_f1_list)
    df_all_params['initial_gap_f1'][i] = np.mean(initial_gap_f1_list)

    df_all_params['avg_max_gap_accuracy'][i] = np.mean(max_gap_accuracy_list)
    df_all_params['avg_mean_gap_accuracy'][i] = np.mean(mean_gap_accuracy_list)
    df_all_params['initial_gap_accuracy'][i] = np.mean(initial_gap_accuracy_list)

    df_all_params['avg_utility_tpr'][i] = np.mean(utility_tpr_list)
    df_all_params['avg_utility_fpr'][i] = np.mean(utility_fpr_list)
    df_all_params['avg_utility_precision'][i] = np.mean(utility_precision_list)
    df_all_params['avg_utility_f1'][i] = np.mean(utility_f1_list)
    df_all_params['avg_utility_accuracy'][i] = np.mean(utility_accuracy_list)

    # check if all values in initial_unfairness_list are the same
    # if not, then there is a problem
    if not all(x == initial_unfairness_risk_list[0] for x in initial_unfairness_risk_list):
        print('error')

    n_traces = results[i][1][1]
    # n_violated = results[i][1][3]
    df_all_params['n_traces'][i] = n_traces 
    # df_all_params['n_violated_0.038'][i] = n_violated
    

In [9]:
df_all_params

Unnamed: 0,model,threshold,n_hosp_mode,n_prescription_mode,avg_max_unfairness_risk,avg_mean_unfairness_risk,initial_unfairness_risk,avg_max_gap_tpr,avg_mean_gap_tpr,initial_gap_tpr,...,initial_gap_f1,avg_max_gap_accuracy,avg_mean_gap_accuracy,initial_gap_accuracy,avg_utility_tpr,avg_utility_fpr,avg_utility_precision,avg_utility_f1,avg_utility_accuracy,n_traces
0,xgboost,0.3,expectation,expectation,0.034326,0.018609,0.001172,0.116718,0.083542,0.103341,...,0.041287,0.043808,0.026334,0.000011,0.754151,0.221015,0.081843,0.145820,0.778414,10
1,xgboost,0.4,expectation,expectation,0.034190,0.018479,0.001172,0.129267,0.103000,0.107874,...,0.019108,0.041355,0.021947,0.005875,0.671434,0.145982,0.111901,0.187242,0.849820,10
2,xgboost,0.45,expectation,expectation,0.034062,0.018439,0.001172,0.141259,0.113732,0.113442,...,0.000482,0.035838,0.018203,0.008449,0.632535,0.114534,0.133111,0.213616,0.879651,10
3,xgboost,0.5,expectation,expectation,0.034036,0.018372,0.001172,0.158405,0.130731,0.134033,...,0.023762,0.036618,0.017988,0.008983,0.584133,0.088984,0.155823,0.237894,0.903502,10
4,xgboost,0.55,expectation,expectation,0.034154,0.018201,0.001172,0.179215,0.150646,0.120694,...,0.022847,0.038856,0.017284,0.007927,0.533025,0.065802,0.184935,0.265255,0.924976,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
163,mlp,0.45,random_2,random_2,0.023138,0.013261,0.003897,0.123867,0.090829,0.108003,...,0.031205,0.018791,0.013545,0.015772,0.616028,0.131331,0.119472,0.193420,0.862861,10
164,mlp,0.5,random_2,random_2,0.022309,0.012623,0.003897,0.119917,0.090048,0.102046,...,0.050517,0.018110,0.011895,0.016443,0.558984,0.096373,0.143264,0.219629,0.895704,10
165,mlp,0.55,random_2,random_2,0.022815,0.013169,0.003897,0.116809,0.093436,0.090521,...,0.052024,0.019626,0.016202,0.014044,0.494625,0.059596,0.187137,0.262512,0.930156,15
166,mlp,0.6,random_2,random_2,0.023307,0.013223,0.003897,0.106462,0.081666,0.072650,...,0.057949,0.021937,0.017124,0.012744,0.435977,0.035182,0.247316,0.306963,0.952661,10


In [13]:
df_all_params.threshold[0]

'0.3'

In [10]:
df_all_params.to_csv('./simulation_results/all_params_168_w_utility.csv')

In [11]:
df_all_params.shape

(168, 28)