In [None]:
# IO
from pathlib import Path
try:
    import cPickle as pickle
except ModuleNotFoundError:
    import pickle

# Utility Libraries
import math
from datetime import datetime
import re
import csv
import itertools

# Data Processing
import pandas as pd
import numpy as np

# Predictive Analytics
import statsmodels.stats.api as sms
from sklearn.feature_selection import VarianceThreshold
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import LeaveOneGroupOut
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from bcpn_pipeline import data, features, models, consts
import shap

# Viz
%matplotlib inline
import matplotlib as mpl
from matplotlib.dates import DateFormatter
from matplotlib.cbook import boxplot_stats
import matplotlib.dates as mdates
import matplotlib.transforms as mtrans
import seaborn as sns
sns.set_style("whitegrid")

import matplotlib.pyplot as plt
plt.rcParams.update({'figure.autolayout': True})
# plt.rcParams.update({'figure.facecolor': [1.0, 1.0, 1.0, 1.0]})

# configure autoreloading of modules
%load_ext autoreload
%autoreload 2


In [None]:
import glob
all_files = glob.glob("results/*.csv")
all_files

pred = []
auc = []

for filename in all_files:
    df = pd.read_csv(filename, index_col=None, header=0)
    if 'auc' in filename:
        auc.append(df)
    else:
        pred.append(df)

pred_res = pd.concat(pred, axis=0, ignore_index=True)
auc_res = pd.concat(auc, axis=0, ignore_index=True)

In [None]:
pred_res

In [None]:
df = pred_res.sort_values(by=['test_accuracy', 'featureset'], ascending=False)
df[['test_accuracy', 'train_accuracy', 'method', 'featureset']]

In [None]:
auc_res

In [None]:

# ax.plot(
# #         label=r'Mean ROC (AUC = %0.2f $\pm$ %0.2f)' % (mean_auc, std_auc),
#         lw=2, alpha=.8,
        
#        )
df = auc_res[(auc_res['featureset'] == 'study_day') & (auc_res['gridsearch'] == True)]
ax = sns.lineplot('test_mean_fpr', 'test_mean_tpr', hue='method', data=df)
ax.set(xlabel='False Positive Rate (Positive Label: 1)', ylabel='True Positive Rate (Positive Label: 1)')
plt.show()