### Model Evaluation

In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from utils.helpers import get_database_connection
from postmodeling.evaluation import *

In [None]:
db_conn = get_database_connection()

In [None]:
# Params
model_id = 433
joco_k = 75
doco_k = 40
threshold = 0.01
as_of_date = '2021-09-01'

## Confusion Matrices

In [None]:
# Confusion matrix: Both counties
model_results = get_test_pred_labels_from_csv(model_id)

if len(model_results) > 0:
    cf = get_confusion_matrix(model_results, doco_k=doco_k, joco_k=joco_k)
    plt_cf = plot_confusion_matrix(cf)
    plt_cf.plot()
else: 
    print("No results available from selected model_id: ", model_id)

In [None]:
# Confusion matrix: Johnson County
if len(model_results) > 0:
    cf = get_confusion_matrix(model_results, doco_k=None, joco_k=joco_k)
    plt_cf = plot_confusion_matrix(cf)
    plt_cf.plot()
else:
    print("No results available from selected model_id: ", model_id)

In [None]:
# Confusion matrix: Douglas County
if len(model_results) > 0:
    cf = get_confusion_matrix(model_results, doco_k=doco_k, joco_k=None)
    plt_cf = plot_confusion_matrix(cf)
    plt_cf.plot()
else:
    print("No results available from selected model_id: ", model_id)

## Feature-Labels Crosstabs

In [None]:
# Crosstabs: categorical demographics
features_table = 'demographics_cat'
features_test_pred = get_features_test_pred(db_conn, model_id, features_table, doco_k=doco_k, joco_k=joco_k)
features_test_pred = features_test_pred.fillna('Missing')

### Sex

In [None]:
# Sex: Both counties 
label_crosstab, pred_crosstab = create_crosstabs(features_test_pred, "dem_sex")
print("SEX:")
print('----------------------------------')
print(label_crosstab)
print('----------------------------------')
print(pred_crosstab)

In [None]:
# Sex: Johnson County
label_crosstab, pred_crosstab = create_crosstabs(features_test_pred.loc[features_test_pred['county'] == 'joco'], "dem_sex")
print("Johnson County - SEX:")
print('----------------------------------')
print(label_crosstab)
print('----------------------------------')
print(pred_crosstab)

In [None]:
# Sex: Douglas County
label_crosstab, pred_crosstab = create_crosstabs(features_test_pred.loc[features_test_pred['county'] == 'doco'], "dem_sex")
print("Douglas County - SEX:")
print('----------------------------------')
print(label_crosstab)
print('----------------------------------')
print(pred_crosstab)

## Race

In [None]:
# Race: Both Counties
label_crosstab, pred_crosstab = create_crosstabs(features_test_pred, "dem_race")
print("RACE:")
print('-----------------------------------------------------')
print(label_crosstab)
print('-----------------------------------------------------')
print(pred_crosstab)

In [None]:
# Race: Johnson County
label_crosstab, pred_crosstab = create_crosstabs(features_test_pred.loc[features_test_pred['county'] == 'joco'], "dem_race")
print("Johnson County - RACE:")
print('-----------------------------------------------------')
print(label_crosstab)
print('-----------------------------------------------------')
print(pred_crosstab)

In [None]:
# Race: Douglas County
label_crosstab, pred_crosstab = create_crosstabs(features_test_pred.loc[features_test_pred['county'] == 'doco'], "dem_race")
print("Douglas County - RACE:")
print('-----------------------------------------------------')
print(label_crosstab)
print('-----------------------------------------------------')
print(pred_crosstab)

## Age

In [None]:
# Crosstabs: demographics numeric 
features_table = 'demographics_num'
features_test_pred = get_features_test_pred(db_conn, model_id, features_table, doco_k=doco_k, joco_k=joco_k)

In [None]:
# Age spliting buckets
bins = [0.0, 20.0, 40.0, 65.0, np.inf]
names = ['(1) <20', '(2) 21-40', '(3) 41-65', '(4) 66+']
split_tuple = (bins, names)

In [None]:
# Age: Both Counties
label_crosstab, pred_crosstab = create_crosstabs(features_test_pred, "dem_age", split_tuple)
print("AGE:")
print('-----------------------------------------------------')
print(label_crosstab)
print('-----------------------------------------------------')
print(pred_crosstab)

In [None]:
# Age: Johnson County
label_crosstab, pred_crosstab = create_crosstabs(features_test_pred.loc[features_test_pred['county'] == 'joco'], "dem_age", split_tuple)
print("Johnson County - AGE:")
print('-----------------------------------------------------')
print(label_crosstab)
print('-----------------------------------------------------')
print(pred_crosstab)

In [None]:
# Age: Douglas County
label_crosstab, pred_crosstab = create_crosstabs(features_test_pred.loc[features_test_pred['county'] == 'doco'], "dem_age", split_tuple)
print("Douglas County - AGE:")
print('-----------------------------------------------------')
print(label_crosstab)
print('-----------------------------------------------------')
print(pred_crosstab)

## Days since last event

In [None]:
# Crosstabs: demographics numeric 
features_table = 'client_events_num'
features_test_pred = get_features_test_pred(db_conn, model_id, features_table, doco_k=doco_k, joco_k=joco_k)

In [None]:
# Age spliting buckets
bins = [0.0, 50.0, 100.0, 200.0, 400.0, 800.0, 1600.0, np.inf]
names = ['1.<50', '2.50-100', '3.100-200', '4.200-400', '5.400-800', '6.800-1600', '7.1600+']
split_tuple = (bins, names)

In [None]:
# Age: Both Counties
label_crosstab, pred_crosstab = create_crosstabs(features_test_pred, "event_days_since_last", split_tuple)
print("AGE:")
print('-----------------------------------------------------')
print(label_crosstab)
print('-----------------------------------------------------')
print(pred_crosstab)