In [None]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sqlalchemy import create_engine
import os

import utils

from IPython.display import Image
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

dburl = os.environ['DBURL']
engine = create_engine(dburl)

-----
# Plotting results for best models for "Access in 6 months"
- Overall performance
- Precision-Recall curves (alternatively PPV-Sensitivity Trade-Off)
- Feature importance

In [None]:
best_6months = {'sklearn.ensemble.RandomForestClassifier': 20854,
                #'sklearn.tree.DecisionTreeClassifier': 20882,
                'triage.component.catwalk.estimators.classifiers.ScaledLogisticRegression': 20892,
                #'xgboost.sklearn.XGBClassifier': 20932,
               'Expert Rules': 21065}

labels = {'sklearn.ensemble.RandomForestClassifier': 'Random Forest (1000 trees, no max depth)',
          'triage.component.catwalk.estimators.classifiers.ScaledLogisticRegression': "Logistic Regression (L1 penalty)",
          'Expert Rules': "Expert Baseline"
       }

colors = {'sklearn.ensemble.RandomForestClassifier': '#00A1D5FF',
          'baseline': '#374E55FF',
          #'sklearn.tree.DecisionTreeClassifier': '#DF8F44FF',
          'triage.component.catwalk.estimators.classifiers.ScaledLogisticRegression': "#79AF97FF",
          'Expert Rules': "#B24745FF"
         }

In [None]:
model_id_to_plot = utils.get_model_id_at_time(best_6months['sklearn.ensemble.RandomForestClassifier'], '2015-01-01')

In [None]:
# needed because we're ignoring the last partial year of data
year_filter = lambda x: x[0].year < 2016

In [None]:
metric = 'precision@'
parameter = '10.0_pct'
sns.set_style("whitegrid")
fig, ax = plt.subplots(1,figsize=(24, 10))
sns.set_context("poster", font_scale=1, rc={"lines.linewidth": 4,"lines.markersize":12})
_ = plt.ylim(0,1)
_ = plt.ylabel(f'{metric}:{parameter}')

for clf, model_group_id in best_6months.items():
    label = labels[clf]
    x,y = utils.get_model_evaluation(model_group_id,metric,parameter, filter_x=year_filter)
    _ = plt.plot(x,y,label=label,marker='o',linestyle='-',color=colors[clf])

x_baseline, y_baseline = utils.get_model_evaluation(model_group_id,metric,'100.0_pct', filter_x=year_filter)
_ = plt.plot(x_baseline,y_baseline,label='Prior',marker='o',linestyle='-',color=colors['baseline'])
_ = plt.ylabel("Positive Predictive Value \n(Top 10% of scores are predicted positive)", fontsize=24)
_ = plt.xlabel("Year of Appointment for Validation Cohort", fontsize=24)
    
_ = plt.legend(bbox_to_anchor=(0., .85, 1., .102), loc='upper center', ncol=2, borderaxespad=0., fontsize=24)

In [None]:
utils.plot_pr_at_k_for_model(model_id_to_plot, "Accessing care at 6 months")

In [None]:
utils.plot_feature_imp(model_id_to_plot, max, 'Access in 6 months')