In [1]:
import copy
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.metrics import roc_auc_score, average_precision_score
from sklearn.metrics import make_scorer, accuracy_score
# from pmlb import pmlb   
import time

dataset = 'data/mimic/mimic4_admissions.csv'
sample_size = np.inf
random_state = 42

# train a model to predict admissions

In [2]:
from io_mimic import read_file

##################################################
# setup data
##################################################
tmp = read_file(
    dataset,
    one_hot_encode = 'ohc',
    label='y',
    freetext_features=[ 'chiefcomplaint'],
#     label_features=[
#         'ethnicity',
#         'insurance'
#     ],
    sample_size = sample_size
)
if len(tmp) == 2:
    features, labels = tmp 
elif len(tmp) == 3:
    print('Note: changing "groups" from',groups,'to',tmp[2])
    features, labels, groups = tmp 
print('features:')
print(features.head())
print(features.shape)
# generate train/test split
X_train, X_test, y_train, y_test = train_test_split(features, labels,
                                                train_size=0.75,
                                                test_size=0.25,
                                                shuffle=True,
                                                random_state=random_state,
                                                stratify=labels
                                                )                                                      

skipping label encoding for chiefcomplaint
One Hot Encoding chiefcomplaint
features:
   temperature  heartrate  resprate  o2sat    sbp   dbp  pain  acuity  \
0          NaN        NaN       NaN    NaN    NaN   NaN   NaN     NaN   
1         97.5       81.0      17.0  100.0  167.0  96.0   0.0     3.0   
2         98.2      111.0      18.0   96.0  100.0  66.0   5.0     2.0   
3         98.9       87.0      20.0   99.0  140.0  83.0   6.0     1.0   
4         97.0       99.0      18.0  100.0  168.0  84.0   0.0     3.0   

   insurance  language  ...  chiefcomplaint_vision  chiefcomplaint_visual  \
0          1         1  ...                      0                      0   
1          1         1  ...                      0                      0   
2          2         1  ...                      0                      0   
3          2         1  ...                      0                      0   
4          0         1  ...                      0                      0   

   chiefcompl

In [3]:
from io_mimic import one_hot_encode_labels
df = X_train
for col in ['ethnicity','insurance']:
    df = one_hot_encode_labels(df, col)
df['binary outcome'] = y_train
df.to_csv('data/mimic/development_dataset.csv',index=False)

In [4]:
df.columns[-10:]

Index(['chiefcomplaint_wrist', 'ethnicity_0', 'ethnicity_1', 'ethnicity_2',
       'ethnicity_3', 'ethnicity_4', 'insurance_0', 'insurance_1',
       'insurance_2', 'binary outcome'],
      dtype='object')

# define the estimator

We define a penalized (lasso) logistic regression model with median imputation and standard scaling as preprocessing steps. 

In [5]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.compose import ColumnTransformer
from sklearn.experimental import enable_halving_search_cv # noqa
from sklearn.model_selection import HalvingGridSearchCV
import numpy as np
from tempfile import mkdtemp
cachedir = mkdtemp()

base_model = LogisticRegression(n_jobs=1, solver='saga',penalty='l1')
# base_model = RandomForestClassifier(n_jobs=1)

categorical_features = ['insurance','ethnicity'] #X_train.select_dtypes(['object','category','int']).columns 
numeric_features = [col for col in X_train.select_dtypes('float').columns] # columns if col not in categorical_features]
print('categorical features:',categorical_features)
print('numeric features:',numeric_features)

numeric_transformer = make_pipeline(SimpleImputer(strategy="median"), StandardScaler())

# from sklearn.ensemble import RandomForestClassifier
# est = RandomForestClassifier()
preprocessor = ColumnTransformer(
    [
        ("num", numeric_transformer, numeric_features),
        (
            "cat",
            OneHotEncoder(handle_unknown="ignore", sparse_output=False),
            categorical_features,
        ),
    ],
    verbose_feature_names_out=False,
    remainder='passthrough'
)
est = make_pipeline(preprocessor, base_model)

categorical features: ['insurance', 'ethnicity']
numeric features: ['temperature', 'heartrate', 'resprate', 'o2sat', 'sbp', 'dbp', 'pain', 'acuity', 'prev_adm']


In [6]:
# train model
########################################
# configure estimators
########################################
setatts = {
    'random_state':random_state, 
}
np.random.seed(random_state)
for k,v in setatts.items():
    if hasattr(est, k):
        setattr(est, k, v)
# print(40*'=','Evaluating ',est,' on ',dataset,40*'=',sep='\n')


print('X_train:',X_train.shape)
print('y_train:',y_train.shape)


################################################## 
# Fit models
################################################## 
print('training',est)
t0 = time.time()
# with warnings.catch_warnings():
#     warnings.simplefilter("ignore")
est.fit(X_train, y_train)
train_time = time.time() - t0
print('Training finished in',train_time,'seconds')

X_train: (120012, 303)
y_train: (120012,)
training Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('num',
                                                  Pipeline(steps=[('simpleimputer',
                                                                   SimpleImputer(strategy='median')),
                                                                  ('standardscaler',
                                                                   StandardScaler())]),
                                                  ['temperature', 'heartrate',
                                                   'resprate', 'o2sat', 'sbp',
                                                   'dbp', 'pain', 'acuity',
                                                   'prev_adm']),
                                                 ('cat',
                                                  OneHotEncoder(handle_unknown=



# measure disparities
- make a dataframe of model predictions and demographics
- run `measure_disparity.py` on the dataframe

In [9]:
# make dataframe
demographics = [c for c in X_test.columns if any(g in c for g in ['ethnicity','gender','insurance'])]
demographics
import json
with open('data/mimic/mimic4_admissions.csv.label_encodings.json','r') as f:
    enc = json.load(f)
X_nice = X_test.copy()
for d in demographics:
    print(d)
    if d in enc.keys():
        print(enc[d]['classes_'])
        print(X_nice[d].unique())
        X_nice[d] = X_nice[d].apply(lambda x: enc[d]['classes_'][x])
df = pd.DataFrame(X_nice[demographics])
df['model prediction'] = est.predict_proba(X_test)[:,1]
df['model label'] = est.predict(X_test)
df['sample weights'] = np.ones((len(X_test),))
df['binary outcome'] = y_test

df.to_csv('lr_model_mimic4_admission.csv', index=False)

insurance
['Medicaid', 'Medicare', 'Other']
[2 0 1]
ethnicity
['AMERICAN INDIAN/ALASKA NATIVE', 'ASIAN', 'BLACK/AFRICAN AMERICAN', 'HISPANIC/LATINO', 'WHITE']
[4 3 2 0 1]
gender
['F', 'M']
[1 0]


In [10]:
df

Unnamed: 0,insurance,ethnicity,gender,model prediction,model label,sample weights,binary outcome
151568,Other,WHITE,M,5.393343e-01,1,1.0,1
97364,Medicaid,HISPANIC/LATINO,M,2.274621e-01,0,1.0,1
26355,Other,BLACK/AFRICAN AMERICAN,M,3.542206e-13,0,1.0,0
156884,Medicare,AMERICAN INDIAN/ALASKA NATIVE,F,7.755708e-02,0,1.0,0
47625,Medicaid,BLACK/AFRICAN AMERICAN,F,1.186550e-01,0,1.0,1
...,...,...,...,...,...,...,...
56,Medicare,WHITE,M,4.396502e-01,0,1.0,1
130737,Other,WHITE,F,2.345275e-01,0,1.0,0
141554,Other,BLACK/AFRICAN AMERICAN,F,1.981685e-02,0,1.0,0
125366,Other,WHITE,F,3.203458e-01,0,1.0,0


# run measure_disparity.py on model output

In [18]:
%run measure_disparity.py --dataset lr_model_mimic4_admission.csv -- --interactive

reading in lr_model_mimic4_admission.csv
demographic columns: ['insurance', 'ethnicity', 'gender']
Overall Performance
	Measures of predictive bias on the whole population.
╭─────────┬─────────┬───────┬───────┬───────────────────┬────────────╮
│   AUROC │   AUPRC │   FPR │   FNR │   Positivity Rate │   Accuracy │
├─────────┼─────────┼───────┼───────┼───────────────────┼────────────┤
│   0.882 │   0.771 │ 0.182 │  0.43 │             0.296 │      0.824 │
╰─────────┴─────────┴───────┴───────┴───────────────────┴────────────╯
Subgroup Fairness Violations
	Measures the deviation in performance for marginal and intersectional groups.
	Note that these deviation are weighted by group prevalence to produce stable estimates when sample sizes are small.
╭─────────────┬───────────────────────────────┬──────────┬─────────────────────┬─────────┬─────────┬───────────────────╮
│   insurance │                     ethnicity │   gender │   Brier Score (MSE) │     FNR │     FPR │   Positivity Rate │
├────

MultipleInstanceError: An incompatible sibling of 'TerminalIPythonApp' is already instantiated as singleton: IPKernelApp

In [12]:
from measure_disparity import measure_disparity
df_fairness = measure_disparity('lr_model_mimic4_admission.csv')

reading in lr_model_mimic4_admission.csv
demographic columns: ['insurance', 'ethnicity', 'gender']
Overall Performance
	Measures of predictive bias on the whole population.
╭─────────┬─────────┬───────┬───────┬───────────────────┬────────────╮
│   AUROC │   AUPRC │   FPR │   FNR │   Positivity Rate │   Accuracy │
├─────────┼─────────┼───────┼───────┼───────────────────┼────────────┤
│   0.882 │   0.771 │ 0.182 │  0.43 │             0.296 │      0.824 │
╰─────────┴─────────┴───────┴───────┴───────────────────┴────────────╯
Subgroup Fairness Violations
	Measures the deviation in performance for marginal and intersectional groups.
	Note that these deviation are weighted by group prevalence to produce stable estimates when sample sizes are small.
╭─────────────┬───────────────────────────────┬──────────┬─────────────────────┬─────────┬─────────┬───────────────────╮
│   insurance │                     ethnicity │   gender │   Brier Score (MSE) │     FNR │     FPR │   Positivity Rate │
├────

In [14]:
df_fairness

Unnamed: 0_level_0,Unnamed: 1_level_0,metric,Brier Score (MSE),FNR,FPR,Positivity Rate
insurance,ethnicity,gender,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
any,any,F,-0.005876,0.016905,-0.01218624,-0.022569
any,any,M,0.005876,-0.012249,0.01400051,0.022569
any,AMERICAN INDIAN/ALASKA NATIVE,any,-6e-06,-9.6e-05,-6.480779e-06,9.2e-05
any,ASIAN,any,0.000958,0.000489,0.002092688,0.001933
any,BLACK/AFRICAN AMERICAN,any,-0.012077,0.042662,-0.02237866,-0.041334
any,HISPANIC/LATINO,any,-0.003851,0.018983,-0.008565609,-0.015937
any,WHITE,any,0.014977,-0.022569,0.04123427,0.055247
Medicaid,any,any,-0.00536,0.012227,-0.009386706,-0.016456
Medicaid,AMERICAN INDIAN/ALASKA NATIVE,F,-3.3e-05,0.000126,-6.265684e-05,-0.000103
Medicaid,AMERICAN INDIAN/ALASKA NATIVE,M,-5e-06,-3.2e-05,6.678796e-07,-8e-06
