In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime as datetime
import seaborn as sns
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from utils.helpers import get_database_connection, get_label_from_experiment_table
from utils.constants import PREDICTIONS_DIR
from postmodeling.evaluation import get_test_pred_labels_from_csv
from dateutil.relativedelta import relativedelta


In [None]:
# Functioned needed

def get_predictions_thresholded_k(test_predictions, doco_k=40, joco_k=75):
    ''' Get test Predictions, but only up to the threshold k
    '''
    test_predictions_d = test_predictions[test_predictions['county'] == 'doco']
    test_predictions_d_thr = test_predictions_d[test_predictions_d['county_k'] <= doco_k]

    test_predictions_j = test_predictions[test_predictions['county'] == 'joco']
    test_predictions_j_thr = test_predictions_j[test_predictions_j['county_k'] <= joco_k]

    table = pd.concat([test_predictions_j_thr, test_predictions_d_thr], axis = 0)

    return table

def get_future_events(table_name, joid_list, event_date, as_of_date):
    '''
    Get all events in table_name for specified joids, after the date event_date
    '''

    query = f'''
    select *
    from {table_name}
    where joid in {joid_list}
    and {event_date} > '{as_of_date}'
    '''
    future_events = pd.read_sql(query, db_conn)

    return future_events

def get_date_range(start_date, month_jump):
    ''' 
    Get a date range from start_date to the present (i.e. to use as bins for a histogram)
    '''
    date_list = [start_date]
    while date_list[-1] < datetime.date.today() - relativedelta(months=month_jump):
        date_list.append(date_list[-1] + relativedelta(months=month_jump))

    return date_list

def plot_ER_amb_events_over_time(data, filter_flag = None): 
    '''
    Plot a histogram for events over time
    '''
    plt.figure(figsize=(10,3))
        
    if filter_flag != None:
        data = data[data[filter_flag] == True]
    data['event_date'].hist(bins = get_date_range(as_of_date, 1))
    plt.gca().set_xlim(left = as_of_date)
    plt.xticks(get_date_range(as_of_date, 3))
    plt.grid(visible=None)

    if filter_flag == None:
        flag_title = 'All Ambulance and ER Events'
    else:
        flag_title = filter_flag

    plt.title('Counts of ' + flag_title.replace('_', ' ').title() + ' for False Alarms Post As-Of-Date')
    plt.show

def get_all_flagged_events():
    '''
    Join the ambulance and ER visits together into one dataframe
    '''

    ambulance_runs_false_alarms = get_future_events('semantic.ambulance_runs', joids_str,'event_date', as_of_date)
    ambulance_runs_false_alarms = ambulance_runs_false_alarms[['joid', 'event_date'] +  non_fatal_flags]

    ER_visits_false_alarms = get_future_events('clean.joco110hsccclientmisc2eadiagnosis', joids_str, 'admission_date',as_of_date)
    ER_visits_false_alarms = ER_visits_false_alarms[['joid', 'admission_date'] +  non_fatal_flags]
    ER_visits_false_alarms = ER_visits_false_alarms.rename(columns={'admission_date':'event_date'})

    all_flagged_events = pd.concat([ambulance_runs_false_alarms,ER_visits_false_alarms ], axis = 0)
    all_flagged_events.reset_index(inplace = True)

    return all_flagged_events


Changeable parameters (experiment id, model_id). 

In [None]:
#### CHANGE EXPERIMENT ID AS NEEDED ####
experiment_id = 25
modelid = 434
########################################
db_conn = get_database_connection() 
non_fatal_flags = ['suicidal_flag', 'suicide_attempt_flag', 'drug_flag', 'alcohol_flag', 'other_mental_crisis_flag']
all_flags = non_fatal_flags + ['death_flag']

In [None]:
# get the predictions:
test_predictions = get_test_pred_labels_from_csv(modelid, PREDICTIONS_DIR)
thresholded_predictions = get_predictions_thresholded_k(test_predictions)
thresholded_predictions = get_predictions_thresholded_k(test_predictions)


In [None]:
# print some basic information about this model
label = get_label_from_experiment_table(db_conn, experiment_id) 
as_of_date = datetime.datetime.strptime(thresholded_predictions['as_of_date'].unique()[0], '%Y-%m-%d').date()
label_end_date = as_of_date + relativedelta(months=6)

print('LABELS\nlabel includes ' + ', '.join(l for l in label).lower() +'\n\n')
print('VALIDATION\nvalidation period goes from ' + str(as_of_date) + ' until ' + str(label_end_date))


In [None]:

# get just the people who the model is wrong about (i.e. predicted the outcome, but they didn't have it in the time period)
false_predictions_df = thresholded_predictions[thresholded_predictions['label'] == False]
false_predictions_list = list(false_predictions_df['joid'])
joids_str = '(' + ', '.join([str(i) for i in false_predictions_list]) + ')'
 
all_flagged_events = get_all_flagged_events().reset_index(drop = True).drop(columns = 'index')
all_flagged_events['death_flag'] = False # add a death flag (all these are automatically set to false)

In [None]:
# get all the deaths, then join it with the non-death events:

deaths_false_alarms_d = get_future_events('clean.jocodcmexoverdosessuicides', joids_str,'dateofdeath', as_of_date)
deaths_false_alarms_j = get_future_events('clean.jocojcmexoverdosessuicides', joids_str,'dateofdeath', as_of_date)

all_future_deaths = pd.concat([deaths_false_alarms_d, deaths_false_alarms_j], axis = 0).rename(columns= {'dateofdeath':'event_date'}).reset_index()
all_future_deaths = all_future_deaths[['joid', 'event_date']]
all_future_deaths['death_flag'] = True
all_future_deaths

all_flagged_events = pd.merge(all_flagged_events, all_future_deaths, how = 'outer' , on = ['joid', 'event_date', 'death_flag']).fillna(False)

In [None]:
# get summary counts of everything. Note that these are not per person, but per flag
future_client_events = get_future_events('semantic.client_events', joids_str,'event_date', as_of_date)
summary_counts = all_flagged_events[all_flags].sum()
print('SUMMARY: COUNTS OF ALL FLAGS IN FUTURE OF AS OF DATE\n')
print(summary_counts)


In [None]:
# plot the # of occurances for each flag over time. 

for filter_flag in [None] + all_flags:

    plot_ER_amb_events_over_time(all_flagged_events, filter_flag = filter_flag)


In [None]:
# Convert dataframe to by-joid
false_alarm_by_joid = all_flagged_events.groupby('joid')[all_flags].sum().reset_index()
false_alarm_by_joid = pd.merge(false_predictions_df[['joid']], false_alarm_by_joid, on ='joid', how = 'left').fillna(0)
columns = list(false_alarm_by_joid.columns)
columns.remove('joid')
false_alarm_by_joid['any'] = false_alarm_by_joid[columns].sum(axis = 1).astype(bool)


In [None]:
# Get column for if the person ever had one of the crises post as-of-date
sum_with_crises = np.sum(false_alarm_by_joid['any'] != False)
total_num_people = false_alarm_by_joid.shape[0]

# print summary for people 
print(str(sum_with_crises)+ ' people out of ' + str(total_num_people) + ' total "false alarms" had a behavioral crisis in the future')

In [None]:

print('Counts of each flag for all the "false alarms" in the future of the as-of-date')
pd.set_option('display.max_rows', None)
false_alarm_by_joid
