### Model Evaluation

In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

from utils.helpers import get_database_connection
from utils.constants import SQL_READ_PRED_LABELS, SQL_READ_FEATURES_TEST_PRED

from pipeline.evaluation import *

In [None]:
db_conn = get_database_connection()

In [None]:
# Params
model_id = 8
joco_k = 75
doco_k = 40
threshold = 0.01
as_of_date = '2021-07-01'

### Coufusion Matrices

In [None]:
# Confusion matrix: Both counties
model_results = get_test_pred_labels(db_conn, model_id)

if len(model_results) > 0:
    cf = get_confusion_matrix(model_results, doco_k=doco_k, joco_k=joco_k)
    plt_cf = plot_confusion_matrix(cf)
    plt_cf.plot()
else: 
    print("No results available from selected model_id: ", model_id)

In [None]:
# Confusion matrix: Johnson County
if len(model_results) > 0:
    cf = get_confusion_matrix(model_results, doco_k=None, joco_k=joco_k)
    plt_cf = plot_confusion_matrix(cf)
    plt_cf.plot()
else:
    print("No results available from selected model_id: ", model_id)

In [None]:
# Confusion matrix: Douglas County
if len(model_results) > 0:
    cf = get_confusion_matrix(model_results, doco_k=doco_k, joco_k=None)
    plt_cf = plot_confusion_matrix(cf)
    plt_cf.plot()
else:
    print("No results available from selected model_id: ", model_id)

### Feature-Labels Crosstabs

In [None]:
# Crosstabs: categorical demographics
features_table = 'demographics_cat'
features_test_pred = get_features_test_pred(db_conn, as_of_date, model_id, features_table, doco_k=doco_k, joco_k=joco_k)
features_test_pred = features_test_pred.fillna('Missing')

In [None]:
# Sex: Both counties 
label_crosstab, pred_crosstab = create_crosstabs(features_test_pred, "dem_sex")
print("SEX:")
print('----------------------------------')
print(label_crosstab)
print('----------------------------------')
print(pred_crosstab)

In [None]:
# Sex: Johnson County
label_crosstab, pred_crosstab = create_crosstabs(features_test_pred.loc[features_test_pred['county'] == 'joco'], "dem_sex")
print("Johnson County - SEX:")
print('----------------------------------')
print(label_crosstab)
print('----------------------------------')
print(pred_crosstab)

In [None]:
# Sex: Douglas County
label_crosstab, pred_crosstab = create_crosstabs(features_test_pred.loc[features_test_pred['county'] == 'doco'], "dem_sex")
print("Douglas County - SEX:")
print('----------------------------------')
print(label_crosstab)
print('----------------------------------')
print(pred_crosstab)

In [None]:
# Race: Both Counties
label_crosstab, pred_crosstab = create_crosstabs(features_test_pred, "dem_race")
print("RACE:")
print('-----------------------------------------------------')
print(label_crosstab)
print('-----------------------------------------------------')
print(pred_crosstab)

In [None]:
# Race: Johnson County
label_crosstab, pred_crosstab = create_crosstabs(features_test_pred.loc[features_test_pred['county'] == 'joco'], "dem_race")
print("Johnson County - RACE:")
print('-----------------------------------------------------')
print(label_crosstab)
print('-----------------------------------------------------')
print(pred_crosstab)

In [None]:
# Race: Douglas County
label_crosstab, pred_crosstab = create_crosstabs(features_test_pred.loc[features_test_pred['county'] == 'doco'], "dem_race")
print("Douglas County - RACE:")
print('-----------------------------------------------------')
print(label_crosstab)
print('-----------------------------------------------------')
print(pred_crosstab)

In [None]:
# Crosstabs: demographics numeric 
features_table = 'demographics_num'
features_test_pred = get_features_test_pred(db_conn, as_of_date, model_id, features_table, doco_k=doco_k, joco_k=joco_k)
features_test_pred["dem_age_yrs"] = features_test_pred["dem_age"] / 365.25

In [None]:
# Age spliting buckets
bins = [0.0, 20.0, 40.0, 65.0, np.inf]
names = ['<20', '21-40', '41-65', '66+']
split_tuple = (bins, names)

In [None]:
# Age: Both Counties
label_crosstab, pred_crosstab = create_crosstabs(features_test_pred, "dem_age_yrs", split_tuple)
print("AGE:")
print('-----------------------------------------------------')
print(label_crosstab)
print('-----------------------------------------------------')
print(pred_crosstab)

In [None]:
# Age: Johnson County
label_crosstab, pred_crosstab = create_crosstabs(features_test_pred.loc[features_test_pred['county'] == 'joco'], "dem_age_yrs", split_tuple)
print("Johnson County - AGE:")
print('-----------------------------------------------------')
print(label_crosstab)
print('-----------------------------------------------------')
print(pred_crosstab)

In [None]:
# Age: Douglas County
label_crosstab, pred_crosstab = create_crosstabs(features_test_pred.loc[features_test_pred['county'] == 'doco'], "dem_age_yrs", split_tuple)
print("Douglas County - AGE:")
print('-----------------------------------------------------')
print(label_crosstab)
print('-----------------------------------------------------')
print(pred_crosstab)