Author: Emily Wong \
April 23, 2024

# 1. Import libraries, methods, and data

## 1.1. Libraries

In [None]:
import warnings
warnings.filterwarnings('ignore')

# Data wrangling
import pandas as pd
import numpy as np
from numpy.random import uniform, normal, seed

# Machine learning
import sklearn
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score, precision_score, recall_score, roc_auc_score, balanced_accuracy_score, confusion_matrix, ConfusionMatrixDisplay
from sklearn.model_selection import train_test_split, KFold, RepeatedKFold, StratifiedKFold, cross_val_score
from sklearn.model_selection import cross_val_score
import scipy
from scipy.stats import randint
import xgboost as xgb
from imblearn.under_sampling import RandomUnderSampler
import optuna
import joblib

# Visualisation
from sklearn.tree import export_graphviz
from IPython.display import Image
import graphviz
import matplotlib.pyplot as plt
import seaborn as sns # for kernel density plots

# for nested dictionary (calc_weights method)
import collections
def makehash():
    return collections.defaultdict(makehash)

# Fairness
import aif360
import fairlearn
from fairlearn.metrics import demographic_parity_difference, demographic_parity_ratio, equalized_odds_difference, equalized_odds_ratio, false_negative_rate

## 1.2 Reweighing Method

In [None]:
def calc_weights(df, sens_features_name, outcome_name):
    ''' Calculate sample weights according to calculationg given in 
           F. Kamiran and T. Calders,  "Data Preprocessing Techniques for
           Classification without Discrimination," Knowledge and Information
           Systems, 2012.
    ''' 
    
    # combination of label and groups (outputs a table)
    sens_features = df[sens_features_name]
    outcome = df[outcome_name]
    tab = pd.DataFrame(pd.crosstab(index=sens_features, columns=outcome))

    # reweighing weights
    w = makehash()
    n = len(df)
    for r in tab.index:
        key1 = str(r)
        row_sum = tab.loc[r].sum(axis=0)
        for c in tab.columns:
            key2 = str(c)
            col_sum = tab[c].sum()
            if tab.loc[r,c] == 0:
                n_combo = 1
            else:
                n_combo = tab.loc[r,c]
            val = (row_sum*col_sum)/(n*n_combo)
            w[key1][key2] = val
    
    # Instance weights
    instance_weights = []
    for index, row in df.iterrows():
        race = row[sens_features_name]
        out = row[outcome_name]
        instance_weights.append(w[race][str(out)])

    return instance_weights

In [None]:
def model_eval(model, model_label, X_train, Y_train, X_test, Y_test, outcome_label, W=None, verbose=False):
    results = []
    o = outcome_label
    
    # Fit model
    if W is None:
        model.fit(X_train,Y_train)
    else:
        model.fit(X_train,Y_train, sample_weight=W)
    
    # Model predictions on test
    y_pred = model.predict(X_test)
    y_pred_prob = model.predict_proba(X_test)[:,1]
    
    # Test performance metrics
    test_f1 = np.round(f1_score(Y_test,y_pred),3)
    test_precision = np.round(precision_score(Y_test,y_pred),3)
    test_recall = np.round(recall_score(Y_test,y_pred),3)
    test_balanced_acc = np.round(balanced_accuracy_score(Y_test,y_pred),3)
    test_auc = np.round(roc_auc_score(Y_test,y_pred_prob),3)
    
    # Print model performance
    if verbose == True:
        print('Test AUC:', test_auc)
        print('Test Balanced Accuracy:', test_balanced_acc)
        print('Test F1:', test_f1)
        print('Test Precision:', test_precision)
        print('Test Recall:', test_recall)
        
    # Prepare for fairness evaluation
    y_pred = pd.DataFrame(y_pred,columns=['y_pred'])
    y_pred_prob = pd.DataFrame(y_pred_prob,columns=['y_pred_prob'])
    test_set = pd.concat([y_pred,y_pred_prob,Y_test.reset_index(drop=True),X_test.reset_index(drop=True)],axis=1)

    if len(test_set[o][test_set['MOM_RACE_White']==1].unique()) < 2:
        auc_white = None
    else:
        auc_white = np.round(roc_auc_score(test_set[o][test_set['MOM_RACE_White']==1],test_set['y_pred_prob'][test_set['MOM_RACE_White']==1]),3)
    
    # Demographic parity
    p_white = np.mean(test_set['y_pred'][test_set['MOM_RACE_White']==1])
    
    # TP and FN
    pos_lab_set_white = test_set[(test_set[o]==1) & (test_set['MOM_RACE_White']==1)]
    pos_lab_set_white['fn'] = np.where(pos_lab_set_white['y_pred']==0,1,0)
    fn_white = np.mean(pos_lab_set_white['fn'])
    pos_lab_set_white['tp'] = np.where(pos_lab_set_white['y_pred']==1,1,0)
    tp_white = np.mean(pos_lab_set_white['tp'])
    
    # FP, TN
    neg_lab_set_white = test_set[(test_set[o]==0) & (test_set['MOM_RACE_White']==1)]
    neg_lab_set_white['fp'] = np.where(neg_lab_set_white['y_pred']==1,1,0)
    fp_white = np.mean(neg_lab_set_white['fp'])
    neg_lab_set_white['tn'] = np.where(neg_lab_set_white['y_pred']==0,1,0)
    tn_white = np.mean(neg_lab_set_white['tn'])
        
    if W is None:
        reweigh_yn = 'No'
    else:
        reweigh_yn = 'Yes'
    
    for r in races:
        pos_lab = test_set[(test_set[o]==1) & (test_set[r]==1)]
        pos_lab['fn'] = np.where(pos_lab['y_pred']==0,1,0)
        pos_lab['tp'] = np.where(pos_lab['y_pred']==1,1,0)
        
        neg_lab = test_set[(test_set[o]==0) & (test_set[r]==1)]
        neg_lab['fp'] = np.where(neg_lab['y_pred']==1,1,0)
        neg_lab['tn'] = np.where(neg_lab['y_pred']==0,1,0)
        
        if len(test_set[o][test_set[r]==1].unique()) < 2:
            auc = None
        else:
            auc = np.round(roc_auc_score(test_set[o][test_set[r]==1],test_set['y_pred_prob'][test_set[r]==1]),3)
            
        if auc_white == None or auc == None:
            auc_diff = None
        else:
            auc_diff = auc - auc_white
        
        results.append({'Model': model_label,
                        'Reweigh': reweigh_yn,
                        'Iteration': i+1,
                        'Overall Test BA': test_balanced_acc,
                        'Overall Test AUC': test_auc,
                        'Overall Test F1': test_f1,
                        'Overall Test Precision': test_precision,
                        'Overall Test Recall': test_recall,
                        'Race': r,
                        'DP': np.mean(test_set['y_pred'][test_set[r]==1])-p_white,
                        'FN':np.mean(pos_lab['fn']),
                        'FN diff':np.mean(pos_lab['fn'])-fn_white,
                        'TP':np.mean(pos_lab['tp']),
                        'TP diff':np.mean(pos_lab['tp'])-tp_white,
                        'FP': np.mean(neg_lab['fp']),
                        'FP diff': np.mean(neg_lab['fp'])-fp_white,
                        'TN': np.mean(neg_lab['tn']),
                        'TN diff': np.mean(neg_lab['tn'])-tn_white,
                        'Test AUC':auc,
                        'Test AUC diff':auc_diff
                       })
    return results

## 1.3. Data

In [None]:
all_data = pd.read_excel("Eynav cleaned data.xlsx")

In [None]:
# Extract relevant variables for model fitting
outcome = 'EPDS_risk2'
data = all_data[['MOM_AGE','MOM_RACE','ETHNIC_GROUP','ZIP','MARITAL_STATUS','FINANCIAL_CLASS',
                 'LBW','PTB',
                 'DELIVERY_METHOD','NICU_ADMIT','MFCU_ADMIT',
                 'PREE','GDM','GHTN',
                 'MOM_BMI','MOM_LOS','CHILD_LOS',
                 'HIST_ANXIETY','HIST_DEPRESS','HIST_BIPOLAR','HIST_PMAD','MENTAL_HEALTH_DX_CUTOFF',
                 'MED_PSYCH','MED_CARDIO',
                 outcome]]

## 1.3.3. Curate Data

In [None]:
data = data.dropna() # keep only complete data
print("N:",data.shape)

In [None]:
# binary-class
count0, count1 = data[outcome].value_counts()
print(count0, count1)

x = ['0','1']
y = [count0, count1]
plt.bar(x, y)

## 1.3.4. Weight Data

In [None]:
data['w'] = calc_weights(df=data, sens_features_name="MOM_RACE", outcome_name=outcome)

## 1.3.5. Get Dummies and Split

In [None]:
# get dummy variables
data = pd.get_dummies(data)

In [None]:
# split into X and y
X = data.drop([outcome], axis=1)
Y = data[[outcome]]

race = data[['MOM_RACE_Asian or Native Hawaiian or Other Pacific Islander',
             'MOM_RACE_Black or African American',
             'MOM_RACE_Multiracial',
             'MOM_RACE_Other',
             'MOM_RACE_Unknown',
             'MOM_RACE_White',
             'MOM_RACE_Hispanic White']]
strat_df = pd.concat([Y,race],axis=1)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, Y, train_size=0.90, test_size=0.10, shuffle=True, stratify=strat_df, random_state=1234) #random_state = 1234
X_test = X_test.drop(['w'], axis=1)

In [None]:
# Split the data into training and validation sets
race = X_train[['MOM_RACE_Asian or Native Hawaiian or Other Pacific Islander',
                'MOM_RACE_Black or African American',
                'MOM_RACE_Multiracial',
                'MOM_RACE_Other',
                'MOM_RACE_Unknown',
                'MOM_RACE_White',
                'MOM_RACE_Hispanic White']]
strat_df = pd.concat([y_train,race],axis=1)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, train_size=0.85, test_size=0.15, shuffle=True, stratify=strat_df, random_state=0) #random state_1234
X_val = X_val.drop(['w'], axis=1)

In [None]:
# binary-class
count0_train, count1_train = y_train.value_counts()
print(count0_train, count1_train)

count0_val, count1_val = y_val.value_counts()
print(count0_val, count1_val)

count0_test, count1_test = y_test.value_counts()
print(count0_test, count1_test)

# 2. Handle imbalanced data

## 2.2. Simple Under Sampling Majority (PMAD)

In [None]:
rus = RandomUnderSampler(sampling_strategy = "auto", random_state=0)
X_train_rus, y_train_rus = rus.fit_resample(X_train, y_train)
weights_rus = X_train_rus['w']
X_train_rus = X_train_rus.drop(['w'], axis=1)
y_train_rus.value_counts()

# 3. Modeling

In [None]:
# Extract weights and drop from training
weights_train = X_train['w']
X_train = X_train.drop(['w'], axis=1)

In [None]:
races = ['MOM_RACE_Asian or Native Hawaiian or Other Pacific Islander',
         'MOM_RACE_Black or African American',
         'MOM_RACE_Hispanic White',
         'MOM_RACE_Multiracial',
         'MOM_RACE_Other',
         'MOM_RACE_Unknown',
         'MOM_RACE_White'
        ]

orig_results = []
n_trials = 100 # for finetuning

## XG Boost Classifier

### Finetune XG Boost Classifier without Reweighing

In [None]:
x = X_train_rus
y = y_train_rus.values.ravel()

def objective(trial):
    params = {
        "seed":0,
        "verbosity": 0,
        "objective": "binary:logistic",
        "n_estimators": trial.suggest_int("n_estimators", 1, 100),
        "learning_rate": trial.suggest_float("learning_rate", 1e-3, 10, log=True),
        "max_depth": trial.suggest_int("max_depth", 1, 5),
        "subsample": trial.suggest_float("subsample", 1e-3, 1),
        "lambda": trial.suggest_int("lambda", 3, 100),
        "min_child_weight": trial.suggest_int("min_child_weight", 1, 100)
    }

    model = xgb.XGBClassifier(**params)
    model.fit(x, y, verbose=False)
    predictions = model.predict(X_val)
    predictions_prob = model.predict_proba(X_val)[:,1]
    BA = balanced_accuracy_score(y_val, predictions)
    AUC = roc_auc_score(y_val, predictions_prob)
    return AUC

#optuna.logging.set_verbosity(optuna.logging.WARNING)
sampler = optuna.samplers.TPESampler(seed=0, consider_prior=True, prior_weight=0.5) 
study = optuna.create_study(direction='maximize',sampler=sampler)
study.optimize(objective, n_trials=n_trials)

In [None]:
best_xgb = xgb.XGBClassifier(objective='binary:logistic',verbosity=0, seed=0, **study.best_params)
i = -1
orig_XGB = pd.DataFrame(model_eval(model=best_xgb, model_label='XGB', X_train=x, Y_train=y, X_test=X_test, Y_test=y_test, outcome_label=outcome, verbose=True))

In [None]:
gain = best_xgb.get_booster().get_score(importance_type='gain')
gain_sorted = dict(sorted(gain.items(), key=lambda x: x[1], reverse=False))

features = list(gain_sorted.keys())
values = list(gain_sorted.values())

fig, ax = plt.subplots()
ax.barh(features,values)
plt.yticks(fontsize=5.5)
ax.set_xlabel("Gain")
plt.savefig('EPDS XGB Feature Importance (val).png',dpi=600, bbox_inches='tight')

In [None]:
joblib.dump(best_xgb, 'best_xgb_epds_no_reweigh_val.pkl')

### Finetune XG Boost Classifier with Reweighing

In [None]:
x = X_train_rus
y = y_train_rus.values.ravel()
w = weights_rus

def objective(trial):
    params = {
        "seed":0,
        "verbosity": 0,
        "objective": "binary:logistic",
        "n_estimators": trial.suggest_int("n_estimators", 1, 100),
        "learning_rate": trial.suggest_float("learning_rate", 1e-3, 10, log=True),
        "max_depth": trial.suggest_int("max_depth", 1, 5),
        "subsample": trial.suggest_float("subsample", 1e-3, 1),
        "lambda": trial.suggest_int("lambda", 10, 100),
        "min_child_weight": trial.suggest_int("min_child_weight", 1, 100)
    }

    model = xgb.XGBClassifier(**params)
    model.fit(x, y, sample_weight=w, verbose=False)
    predictions = model.predict(X_val)
    predictions_prob = model.predict_proba(X_val)[:,1]
    BA = balanced_accuracy_score(y_val, predictions)
    AUC = roc_auc_score(y_val, predictions_prob)
    return AUC

#optuna.logging.set_verbosity(optuna.logging.WARNING)
sampler = optuna.samplers.TPESampler(seed=0, consider_prior=True, prior_weight=0.5) 
study = optuna.create_study(direction='maximize',sampler=sampler)
study.optimize(objective, n_trials=n_trials)

In [None]:
best_xgb2 = xgb.XGBClassifier(objective='binary:logistic',verbosity=0, seed=0, **study.best_params)

# original evaluation 
i = -1 # (will output as 0 since iteration = i+1)
orig_XGB2 = pd.DataFrame(model_eval(model=best_xgb2, model_label='XGB', X_train=x, Y_train=y, X_test=X_test, Y_test=y_test, outcome_label=outcome, W=w, verbose=True))

In [None]:
gain = best_xgb2.get_booster().get_score(importance_type='gain')
gain_sorted = dict(sorted(gain.items(), key=lambda x: x[1], reverse=False))

features = list(gain_sorted.keys())
values = list(gain_sorted.values())

fig, ax = plt.subplots()
ax.barh(features,values)
plt.yticks(fontsize=5.5)
ax.set_xlabel("Gain")
plt.savefig('EPDS XGB Feature Importance Reweigh.png',dpi=600, bbox_inches='tight')

In [None]:
joblib.dump(best_xgb2, 'best_xgb_epds_reweigh_val.pkl')

## Random Forest

### Finetune Random Forest without Reweighing

In [None]:
x = X_train_rus
y = y_train_rus.values.ravel()

def objective(trial):
    params = {'random_state':0,
             'max_features':'sqrt',
             'ccp_alpha': trial.suggest_float("ccp_alpha", 0, 1),
             'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 50),
             'min_samples_split': trial.suggest_int('min_samples_split', 2, 50),
             'n_estimators': trial.suggest_int('n_estimators', 1, 125),
             'max_depth': trial.suggest_int('max_depth', 1, 5)
             }
    model = RandomForestClassifier(**params)
    model.fit(x, y)
    predictions = model.predict(X_val)
    predictions_prob = model.predict_proba(X_val)[:,1]
    BA = balanced_accuracy_score(y_val, predictions)
    AUC = roc_auc_score(y_val, predictions_prob)
    return AUC

sampler = optuna.samplers.TPESampler(seed=0, consider_prior=True, prior_weight=0.5) 
study = optuna.create_study(direction='maximize',sampler=sampler)
study.optimize(objective, n_trials=n_trials)

In [None]:
best_rf = RandomForestClassifier(random_state=0,max_features='sqrt',**study.best_params)

# original evaluation 
i = -1 # (will output as 0 since iteration = i+1)
orig_RF = pd.DataFrame(model_eval(model=best_rf, model_label='RF', X_train=x, Y_train=y, X_test=X_test, Y_test=y_test, outcome_label=outcome, verbose=True))

In [None]:
from sklearn.inspection import permutation_importance
imp_result = permutation_importance(
    best_rf, X_test, y_test, n_repeats=10, random_state=2024, n_jobs=2
)

importance_mean = imp_result.importances_mean
importance_sd = imp_result.importances_std

ind = np.argpartition(importance_mean, -10)[-10:]
top_feat = X_test.columns[ind]
top_vals = importance_mean[ind]
top_std = importance_sd[ind]

fig, ax = plt.subplots()
ax.barh(top_feat,top_vals,xerr=top_std)
ax.set_xlabel("Mean accuracy decrease")
plt.savefig('EPDS RF Feature Importance (val).png',dpi=600, bbox_inches='tight')

In [None]:
joblib.dump(best_rf, 'best_rf_epds_no_reweigh_val.pkl')

### Finetune Random Forest with Reweighing

In [None]:
x = X_train_rus
y = y_train_rus.values.ravel()
w = weights_rus

def objective(trial):
    params = {'random_state':0,
             'max_features':'sqrt',
             'ccp_alpha': trial.suggest_float("ccp_alpha", 0, 1),
             'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 50),
             'min_samples_split': trial.suggest_int('min_samples_split', 2, 50),
             'n_estimators': trial.suggest_int('n_estimators', 1, 125),
             'max_depth': trial.suggest_int('max_depth', 1, 5)
             }
    model = RandomForestClassifier(**params)
    model.fit(x, y, sample_weight=w)
    predictions = model.predict(X_val)
    predictions_prob = model.predict_proba(X_val)[:,1]
    BA = balanced_accuracy_score(y_val, predictions)
    AUC = roc_auc_score(y_val, predictions_prob)
    return AUC

sampler = optuna.samplers.TPESampler(seed=0, consider_prior=True, prior_weight=0.5) 
study = optuna.create_study(direction='maximize',sampler=sampler)
study.optimize(objective, n_trials=n_trials)

In [None]:
best_rf2 = RandomForestClassifier(random_state=0,max_features='sqrt',**study.best_params)

# original evaluation 
i = -1 # (will output as 0 since iteration = i+1)
orig_RF2 = pd.DataFrame(model_eval(model=best_rf2, model_label='RF', X_train=x, Y_train=y, X_test=X_test, Y_test=y_test, outcome_label=outcome, W=w, verbose=True))

In [None]:
from sklearn.inspection import permutation_importance
imp_result = permutation_importance(
    best_rf2, X_test, y_test, n_repeats=10, random_state=2024, n_jobs=2
)

importance_mean = imp_result.importances_mean
importance_sd = imp_result.importances_std

ind = np.argpartition(importance_mean, -10)[-10:]
top_feat = X_test.columns[ind]
top_vals = importance_mean[ind]
top_std = importance_sd[ind]

fig, ax = plt.subplots()
ax.barh(top_feat,top_vals,xerr=top_std)
ax.set_xlabel("Mean accuracy decrease")
plt.savefig('EPDS RF Feature Importance Reweigh (val).png',dpi=600, bbox_inches='tight')

In [None]:
joblib.dump(best_rf2, 'best_rf_epds_reweigh_val.pkl') 

## Logistic Regression

### Finetune Logistic Regression without Reweighing

In [None]:
x = X_train_rus
y = y_train_rus.values.ravel()

def objective(trial):
    params = {'penalty':'l2',
             'C':trial.suggest_loguniform("C", 1e-2, 1),
             'tol':trial.suggest_uniform('tol' , 1e-6 , 1e-3)
             }
    model = LogisticRegression(**params)
    model.fit(x,y)
    predictions = model.predict(X_val)
    predictions_prob = model.predict_proba(X_val)[:,1]
    BA = balanced_accuracy_score(y_val, predictions)
    AUC = roc_auc_score(y_val, predictions_prob)
    return AUC

sampler = optuna.samplers.TPESampler(seed=0, consider_prior=True, prior_weight=0.5) 
study = optuna.create_study(direction='maximize',sampler=sampler)
study.optimize(objective, n_trials=n_trials)

In [None]:
best_glm = LogisticRegression(penalty='l2',**study.best_params)

# original evaluation 
i = -1 # (will output as 0 since iteration = i+1)
orig_GLM = pd.DataFrame(model_eval(model=best_glm, model_label='GLM', X_train=x, Y_train=y, X_test=X_test, Y_test=y_test, outcome_label=outcome, verbose=True))

In [None]:
joblib.dump(best_glm, 'best_glm_epds_no_reweigh_val.pkl') 

### Finetune Logistic Regression with Reweighing

In [None]:
x = X_train_rus
y = y_train_rus.values.ravel()
w = weights_rus

def objective(trial):
    params = {'penalty':'l2',
             'C':trial.suggest_loguniform("C", 1e-2, 1),
             'tol':trial.suggest_uniform('tol' , 1e-6 , 1e-3)
             }
    model = LogisticRegression(**params)
    model.fit(x, y, sample_weight=w)
    predictions = model.predict(X_val)
    predictions_prob = model.predict_proba(X_val)[:,1]
    BA = balanced_accuracy_score(y_val, predictions)
    AUC = roc_auc_score(y_val, predictions_prob)
    return AUC

sampler = optuna.samplers.TPESampler(seed=0, consider_prior=True, prior_weight=0.5) 
study = optuna.create_study(direction='maximize',sampler=sampler)
study.optimize(objective, n_trials=n_trials)

In [None]:
best_glm2 = LogisticRegression(penalty='l2',**study.best_params)

# original evaluation 
i = -1 # (will output as 0 since iteration = i+1)
orig_GLM2 = pd.DataFrame(model_eval(model=best_glm2, model_label='GLM', X_train=x, Y_train=y, X_test=X_test, Y_test=y_test, outcome_label=outcome, W=w, verbose=True))

In [None]:
joblib.dump(best_glm2, 'best_glm_epds_reweigh_val.pkl')

In [None]:
orig_results = pd.concat([orig_XGB,orig_XGB2,orig_RF,orig_RF2,orig_GLM,orig_GLM2],axis=0)
orig_results.to_excel("EPDS_orig_results.xlsx")

# Boostrap Test Results

In [None]:
best_glm = joblib.load('best_glm_phq9_no_reweigh_val.pkl')
best_glm2 = joblib.load('best_glm_phq9_reweigh_val.pkl')

best_rf = joblib.load('best_rf_phq9_no_reweigh_val.pkl')
best_rf2 = joblib.load('best_rf_phq9_reweigh_val.pkl')

best_xgb = joblib.load('best_xgb_phq9_no_reweigh_val.pkl')
best_xgb2 = joblib.load('best_xgb_phq9_reweigh_val.pkl')

In [None]:
boot_test_results = pd.DataFrame()
outcome = 'EPDS_risk2'
for i in range(100):
    test_set_boot = pd.concat([y_test,X_test],axis=1).sample(n=len(X_test), replace=True, random_state=i, ignore_index=True)
    x = test_set_boot.drop([outcome], axis=1)
    y = test_set_boot[outcome]
        
    boot_test_results = pd.concat([boot_test_results,pd.DataFrame(model_eval(model=best_xgb, model_label='XGB', X_train = X_train_rus, Y_train = y_train_rus.values.ravel(), X_test = x, Y_test = y, outcome_label=outcome, verbose=False))],axis=0)
    boot_test_results = pd.concat([boot_test_results,pd.DataFrame(model_eval(model=best_xgb2, model_label='XGB', X_train = X_train_rus, Y_train = y_train_rus.values.ravel(), X_test = x, Y_test = y, outcome_label=outcome, W=weights_rus, verbose=False))],axis=0)
    boot_test_results = pd.concat([boot_test_results,pd.DataFrame(model_eval(model=best_rf, model_label='RF', X_train = X_train_rus, Y_train = y_train_rus.values.ravel(), X_test = x, Y_test = y, outcome_label=outcome, verbose=False))],axis=0)
    boot_test_results = pd.concat([boot_test_results,pd.DataFrame(model_eval(model=best_rf2, model_label='RF', X_train = X_train_rus, Y_train = y_train_rus.values.ravel(), X_test = x, Y_test = y, outcome_label=outcome, W=weights_rus, verbose=False))],axis=0)
    boot_test_results = pd.concat([boot_test_results,pd.DataFrame(model_eval(model=best_glm, model_label='GLM', X_train = X_train_rus, Y_train = y_train_rus.values.ravel(), X_test = x, Y_test = y, outcome_label=outcome, verbose=False))],axis=0)
    boot_test_results = pd.concat([boot_test_results,pd.DataFrame(model_eval(model=best_glm2, model_label='GLM',X_train = X_train_rus, Y_train = y_train_rus.values.ravel(), X_test = x, Y_test = y, outcome_label=outcome, W=weights_rus, verbose=False))],axis=0)

In [None]:
boot_test_results.to_excel("EPDS_boot_test_results.xlsx")

# Evaluate Models Over Many Splits

In [None]:
outcome = 'EPDS_risk2'

# split into X and y
X = data.drop([outcome], axis=1)
Y = data[[outcome]]
repeat_results = pd.DataFrame()

n_repeat = 10
k_fold = 10
for i in range(n_repeat):
    kf = KFold(n_splits=k_fold, random_state=i, shuffle=True)
    for k, (train_index, test_index) in enumerate(kf.split(X)):
        
        x_train = X.iloc[train_index]
        y_train = Y.iloc[train_index]
        
        x_test = X.iloc[test_index]
        x_test = x_test.drop(['w'], axis=1)
        y_test = Y.iloc[test_index]
        
        rus = RandomUnderSampler(sampling_strategy = "auto", random_state=0)
        x_train_rus, y_train_rus = rus.fit_resample(x_train, y_train)
        weights_rus = x_train_rus['w']
        x_train_rus = x_train_rus.drop(['w'], axis=1)
        y_train_rus.value_counts()
        
        repeat_results = pd.concat([repeat_results,pd.DataFrame(model_eval(model=best_xgb, model_label='XGB', X_train = x_train_rus, Y_train = y_train_rus, X_test = x_test, Y_test = y_test, outcome_label=outcome, verbose=False))],axis=0)
        repeat_results = pd.concat([repeat_results,pd.DataFrame(model_eval(model=best_xgb2, model_label='XGB', X_train = x_train_rus, Y_train = y_train_rus, X_test = x_test, Y_test = y_test, outcome_label=outcome, W=weights_rus, verbose=False))],axis=0)
        repeat_results = pd.concat([repeat_results,pd.DataFrame(model_eval(model=best_rf, model_label='RF', X_train = x_train_rus, Y_train = y_train_rus, X_test = x_test, Y_test = y_test, outcome_label=outcome, verbose=False))],axis=0)
        repeat_results = pd.concat([repeat_results,pd.DataFrame(model_eval(model=best_rf2, model_label='RF', X_train = x_train_rus, Y_train = y_train_rus, X_test = x_test, Y_test = y_test, outcome_label=outcome, W=weights_rus, verbose=False))],axis=0)
        repeat_results = pd.concat([repeat_results,pd.DataFrame(model_eval(model=best_glm, model_label='GLM', X_train = x_train_rus, Y_train = y_train_rus, X_test = x_test, Y_test = y_test, outcome_label=outcome, verbose=False))],axis=0)
        repeat_results = pd.concat([repeat_results,pd.DataFrame(model_eval(model=best_glm2, model_label='GLM',X_train = x_train_rus, Y_train = y_train_rus, X_test = x_test, Y_test = y_test, outcome_label=outcome, W=weights_rus, verbose=False))],axis=0)
    print("Iteration",i,"complete")

In [None]:
repeat_results.to_excel("EPDS_repeat_results.xlsx")

In [None]:
# save this file and output as html
import os
os.system('jupyter nbconvert --to html model_finetune_EPDS.ipynb')