# Suicide and fatal overdoses data questions

This notebook contains a cursory look of fatal overdoses and suicide data. Currently,
the presence of previous drug history and suicidality is determined solely by ambulance
data. A future iteration should look at more carefully aggregating this historical data
from additional data sources.

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import sys
sys.path.append('../src/')

In [None]:
import pandas as pd
import sqlalchemy
import json
from utils.helpers import get_database_connection, get_events
from dateutil.relativedelta import relativedelta
import seaborn as sns
import matplotlib.pyplot as plt
import csv
import os

In [None]:
db_conn = get_database_connection()

In [None]:
# jocomedactincidents
# joco110hsccclientmisc2eaimpression
q = f"""
-- MEDICAL EXAMINER DATA
with jcmex as 
(
select 
    j.joid,
    j.suicide,
    j.overdosed,
    j.dateofbirth,
    j.dateofdeath,
    'j' as county
from clean.jocojcmexoverdosessuicides j
join clean.jocojococlient client
on j.joid = client.joid
),
dcmex as 
(
select
    j.joid,
    j.suicide,
    j.overdosed,
    j.dateofbirth,
    j.dateofdeath,
    'd' as county
from clean.jocojococlient client
join clean.jocodcmexoverdosessuicides j
on client.joid = j.joid
),
mex as 
(
select * from jcmex
union
select * from dcmex
),


-- AMBULANCES
jcamb as 
(
select
    client.joid,
    max(ambulance.suicide_attempt_flag::int)::bool as suicide_attempt_flag,
    max(ambulance.suicidal_flag::int)::bool as suicidal_flag,
    max(ambulance.drug_flag::int)::bool as drug_flag
from clean.jocojococlient client
join clean.jocomedactincidents ambulance
on client.joid = ambulance.joid
group by client.joid
),
dcamb as 
(
select
    client.joid,
    max(ambulance.suicide_attempt_flag::int)::bool as suicide_attempt_flag,
    max(ambulance.suicidal_flag::int)::bool as suicidal_flag,
    max(ambulance.drug_flag::int)::bool as drug_flag
from clean.jocojococlient client
join clean.joco110hsccclientmisc2eaimpression ambulance
on client.joid = ambulance.joid
group by client.joid
),
amb as
(
    select joid, 
    max(suicide_attempt_flag::int)::bool as suicide_attempt_flag,
    max(suicidal_flag::int)::bool as suicidal_flag,
    max(drug_flag::int)::bool as drug_flag
    from
    (select * from jcamb
    union
    select * from dcamb) ambs
    group by joid
),


-- NUM ROWS: number of rows each joid has in the clients tables
num_rows as 
(
select
    joid,
    count(*) as num_client_rows
from
    clean.jocojococlient client
group by joid
),

-- Demographic info
dem as
(
select
    joid, 
    race,
    sex
from clean.jocojcmhcdemographics_dedupe
)

select 
    mex.joid,
    mex.dateofbirth,
    mex.suicide,
    mex.overdosed,
    mex.dateofdeath,
    mex.county,
    amb.suicide_attempt_flag,
    amb.suicidal_flag,
    amb.drug_flag,
    num_client_rows,
    dem.race,
    dem.sex
from mex
left join amb
on mex.joid = amb.joid
left join num_rows on mex.joid = num_rows.joid
left join dem on mex.joid = dem.joid
--group by mex.joid, mex.dateofbirth, num_client_rows, suicide, overdosed, dateofdeath
"""

# select joid, suicidal_flag, drug_flag
# from amb
df = pd.read_sql(q, db_conn)

In [None]:
df.shape

In [None]:
sum(~df['county'].isin(['j', 'd']))

In [None]:
# Ensure no duplicates
df[df['joid'].duplicated(keep=False)].sort_values(by=['joid'])

### Race and sex breakdown of data (aggregated only from mhc currently)

In [None]:
# Demographic data obatined only from mhc
df['race'].value_counts(dropna=False)

In [None]:
sns.set_style("white")
# General setup for plots
plt.rc("axes.spines", top=False, right=False)

In [None]:
ax = sns.histplot(df['race'])
ax.set_xlabel('race', fontsize=20)
ax.set_ylabel('count', fontsize=20)
for label in (ax.get_xticklabels() + ax.get_yticklabels()):
    label.set_fontsize(16)

In [None]:
# Demographic data obatined only from mhc
df['sex'].value_counts(dropna=False)

In [None]:
ax = sns.histplot(df['sex'])
ax.set_xlabel('sex', fontsize=20)
ax.set_ylabel('count', fontsize=20)
for label in (ax.get_xticklabels() + ax.get_yticklabels()):
    label.set_fontsize(16)

## Suicide and overdose data

In [None]:
# def list_to_csv(filename, values):
#     CSV_DIR = 'csv/'
#     path = os.path.join(CSV_DIR, filename)
#     with open(path, 'w') as f:
#         csv_writer = csv.writer(f)
#         values = [[value] for value in values]
#         csv_writer.writerows(values)

### Suicides where a previous suicide attempt (or suicidality in general) is known

In [None]:
df.shape

In [None]:
# new matching
for county in ['johnson', 'douglas']:
    county_letter = county[0]
    county_df = df[df['county'] == county_letter]
    print(len(county_df.index))
    tot_suicides = sum(county_df['suicide'] == True)
    print(tot_suicides)
    num_prev_attempts = sum((county_df['suicide'] == True) & (county_df['suicide_attempt_flag'] == True))
    # The below includes suicidal ideation, self-harm, etc.
    num_prev_suicidal = sum((county_df['suicide'] == True) & (county_df['suicidal_flag'] == True))
    print(f'{num_prev_attempts} out of {tot_suicides} ({num_prev_attempts / (num_prev_attempts + tot_suicides):.1%}) {county} county residents that committed suicide had a previous suicide attempt (tracked by ambulance data)')
    print(f'{num_prev_suicidal} out of {tot_suicides} ({num_prev_suicidal / (num_prev_suicidal + tot_suicides):.1%}) {county} county residents that committed suicide were previously suicidal (tracked by ambulance data)')
    print('-'*20)

### Drug overdoses and previous drug history

In [None]:
for county in ['johnson', 'douglas']:
    county_letter = county[0]
    county_df = df[df['county'] == county_letter]
    tot_fatal_ods = sum(county_df['overdosed'] == True)
    num_od_with_drug_history = sum((county_df['overdosed'] == True) & (county_df['drug_flag'] == True))
    print(f'{num_od_with_drug_history} out of {tot_fatal_ods} ({num_od_with_drug_history / (tot_fatal_ods + num_od_with_drug_history):.1%}) {county} county residents that fatally overdosed had previous drug history (as tracked by ambulance data)')
    print('-'*20)

### First-time interaction with the system is suicide 

In [None]:
for county in ['johnson', 'douglas']:
    county_letter = county[0]
    county_df = df[(df['county'] == county_letter) & (df['suicide'] == True)]
    know_only_suicide = county_df[county_df['num_client_rows'] == 1]
        
    # Export to csv
    # list_to_csv(f'{county}_only_know_suicide.csv', know_only_suicide['joid'].tolist())
    
    num_only_know_suicide = len(know_only_suicide.index) 
    num_know_more_than_just_suicide = len(county_df[county_df['num_client_rows'] > 1].index)
    num_suicides = num_only_know_suicide + num_know_more_than_just_suicide
    print(f'In {county} county, {num_only_know_suicide} of {num_suicides} ({num_only_know_suicide / (num_only_know_suicide + num_know_more_than_just_suicide):.1%}) total suicides have no prior interaction with the system')

### First-time interaction with the system is drug overdose 

In [None]:
for county in ['johnson', 'douglas']:
    county_letter = county[0]
    county_df = df[(df['county'] == county_letter) & (df['overdosed'] == True)]
    only_know_od_df = county_df[county_df['num_client_rows'] == 1]
    num_only_know_od = len(only_know_od_df.index) 
    
    # list_to_csv(f'{county}_only_know_od.csv', only_know_od_df['joid'].tolist())
    
    num_know_more_than_just_od = len(county_df[county_df['num_client_rows'] > 1].index)
    num_ods = num_only_know_od + num_know_more_than_just_od
    print(f'In {county} county, {num_only_know_od} of {num_ods} ({num_only_know_od / (num_only_know_od + num_know_more_than_just_od):.1%}) fatal overdoses have no prior interaction with the system (as tracked by ambulance data)')

### Suicide and overdoses by age

In [None]:
def get_age(death_date, dob):
    """ Return age of person at time of death."""
    if death_date is None or dob is None:
        return None
    return death_date.year - dob.year - ((death_date.month, death_date.day) < (dob.month, dob.day))

In [None]:
df['age'] = [get_age(death_date, dob) for death_date, dob in zip(df['dateofdeath'], df['dateofbirth'])]

In [None]:
df['single_event'] = [True if num_cl_rows == 1 else False for num_cl_rows in df['num_client_rows']]

In [None]:
sns.set()

#### We explore the age distribution of those that committed suicide, conditioned on i), whether their death is their first interaction with the system and, ii), whether the death was through overdose.

In [None]:
for death_type in ['suicide', 'overdosed']:
    for my_hue in ['single_event', 'overdosed']:
        for county in ['j', 'd']:
            print(f'Death type: {death_type}')
            if death_type == 'overdosed':
                print('(Not necessarily suicide)')
            county_name = 'Johnson County' if county == 'j' else 'Douglas County'
            data_df = df[(df['county'] == county) & (df[death_type] == True)]
            sns.histplot(data=data_df, x='age', stat='count', hue=my_hue, binwidth=5)
            plt.title(f'{county_name}')
            plt.show()

## Invesitgate events for each joid that committed suicide

In [None]:
for county in ['j', 'd']:
    print(f'COUNTY: {county}')
    suicide_joids = df.loc[(df['county'] == county) & (df['suicide'] == True), 'joid'].unique()
    suic_joids_str = ', '.join([f"'{joid}'" for joid in suicide_joids])
    q = f"""
    select joid, event_type, count(event_type)
    from semantic.client_events
    where joid in ({suic_joids_str})
    group by joid, event_type
    """
    events_df = pd.read_sql(q, db_conn)
    events_df.head()
    event_types = events_df['event_type'].unique()

    # Sanity check
    for joid in suicide_joids:
        for event_type in event_types:
            assert sum((events_df['joid'] == joid) & (events_df['event_type'] == event_type)) in [0,1]

    events_vectors = []
    for joid in suicide_joids:
        event_vec = []
        for event_type in event_types:
            val_arr = events_df.loc[(events_df['joid'] == joid) & (events_df['event_type'] == event_type), 'count'].values
            if not len(val_arr):
                value = 0
            else:
                value = val_arr[0]
            event_vec.append(value)
        events_vectors.append(event_vec)

    event_counts_df = pd.DataFrame(events_vectors, columns=event_types, index=suicide_joids)
    event_counts_df.mean()
    display(event_counts_df.describe())

In [None]:
sum(event_counts_df['ARREST'] > 0)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
# event_type = 'AMBULANCE'
for event_type in event_types:
    print(event_type)
    g = sns.histplot(data=event_counts_df, x=event_type)
    g.figure.savefig(f'plots/suicide_event_counts_{event_type}.png')
    # plt.plot()
