In [None]:
import sys
sys.path.append('../src/')
from helpers.helpers import *

# Overview

This notebook gives an overview of the medical examiner data in both Johnson and Douglas county. The tables are **jocojcmexoverdosessuicides** and **jocodcmexoverdosessuicides**. We look at a few simple plots and summary statistics on age, suicide, and overdose.

In [None]:
def visualize_deaths_across_time(df):
    '''Takes data frame, creates barplot of deaths across time'''
    
    df.deathyear = pd.DatetimeIndex(df.dateofdeath).year
    df.groupby(df.deathyear).size().plot.bar()
    
    
def visualize_suicides_across_time(df):
    '''Takes data frame, creates barplot of overdoses across time'''
    
    b = df.groupby([df.deathyear, df.suicide]).size().reset_index(name='n')
    sns.barplot(data=b, x='dateofdeath', y='n', hue='suicide')
    
    
def visualize_overdoses_across_time(df):
    '''Takes data frame, creates barplot of overdoses across time'''
    
    b = df.groupby([df.deathyear, df.overdosed]).size().reset_index(name='n')
    sns.barplot(data=b, x='dateofdeath', y='n', hue='overdosed')

    
def visualize_fentanyl_across_time(df):
    '''Takes data frame, creates barplot of overdoses across time'''
    
    b = df.groupby([df.deathyear, df.fentanyl]).size().reset_index(name='n')
    b = b.rename(columns={'level_1': 'fentanyl'})
    sns.barplot(data=b, x='dateofdeath', y='n', hue='fentanyl')

In [None]:
conn = get_database_connection()

# County comparison and summary

In both counties, deaths increased from 2017 until now. The main driver was an increase in overdoses. Johnson county has 657 observations while Douglas county has only 149. 

Douglas county has data on sex, Johnson county does not. Once joined with other tables we can get all relevant demographic data.

Johnson county includes a column 'typeofdrug' (relevant for overdoses and poisoning), while Douglas county uses 'causeofdeath1' and 'causeofdeath2' to encode the type of drug. I have left this as is for now, since we probably do not require going in much detail into the type of drugs.

# Johnson county data

In [None]:
query = "select * from clean.jocojcmexoverdosessuicides"
df = pd.read_sql(query, conn)
df.head()

In [None]:
# n = 657
df.shape

In [None]:
df.mannerofdeath.value_counts()

In [None]:
# 400 suicides over 5 years (2017 - 2022) is not a lot ...
df.suicide.value_counts()

In [None]:
# 299 overdoses over 5 years (2017 - 2022) is not a lot ...
df.overdosed.value_counts()

In [None]:
# There is only a small percentage of suicides by overdoses
np.round((df.suicide == df.overdosed).mean(), 2)

In [None]:
# Not the same as n = 657 because value 'Undetermined' is in fact no overdose or suicide
(df.suicide | df.overdosed).sum()

## Trends across years and age

In [None]:
visualize_deaths_across_time(df);

In [None]:
# Suicides did not really increase over time
visualize_suicides_across_time(df);

In [None]:
# We can do the same plot for overdose, which will show (as expected by the small
# overlap in percentage) that the rise in deaths is due to a rise in overdoses
visualize_overdoses_across_time(df);

In [None]:
# Fentanyl deaths increased dramatically over the last 5 years
def died_of_fentanyl(x):
    return False if x is None or 'FENTANYL' not in x else True

df.fentanyl = pd.Series([died_of_fentanyl(x) for x in list(df.typeofdrugs)])
visualize_fentanyl_across_time(df);

In [None]:
# Deaths affect many age groups, but primarily the young
df.groupby(df.overdosed).age.hist(legend=True);

In [None]:
# There are two cases where the age is zero
# This is due to an error in the date of birth
np.sum(df.age == 0)

In [None]:
# Looks like there is not much difference in age in people
# who overdosed compared to those who did not
sns.violinplot(data=df, x='overdosed', y='age');

In [None]:
# Seems like the reverse from above, but it's not quite it since there is some overlap
sns.violinplot(data=df, x='suicide', y='age');

# Douglas county data

In [None]:
query = "select * from clean.jocodcmexoverdosessuicides"
dc = pd.read_sql(query, conn)
dc.head()

In [None]:
# n = 149
dc.shape

In [None]:
dc.sex.value_counts()

In [None]:
dc.mannerofdeath.value_counts()

In [None]:
dc.suicide.value_counts()

In [None]:
dc.overdosed.value_counts()

In [None]:
# Proportionally more women overdosed than men
dc.groupby(['sex', 'overdosed']).size()

## Trends across years and age

In [None]:
# Deaths increased quite a bit from 2017 to 2021
visualize_deaths_across_time(dc);

In [None]:
# Suicides did not really increase over time
visualize_suicides_across_time(dc);

In [None]:
# Overdoses rose across years
visualize_overdoses_across_time(dc);

In [None]:
# Fentanyl data is more spotty here in Douglas county (+2 in 2022 when using causeofdeath2)
dc.fentanyl = pd.Series([died_of_fentanyl(x) for x in list(dc.causeofdeath1)])
visualize_fentanyl_across_time(dc);

In [None]:
# The latest data we have from 2022 is end of February!
dc.dateofdeath.max()

In [None]:
# There is only a small percentage of suicides by overdoses
np.round((dc.suicide == dc.overdosed).mean(), 2)

In [None]:
# Equals n = 149 because value 'Could not be determined' coded as overdose
(dc.suicide | dc.overdosed).sum()

In [None]:
# Looks like the folks who did not overdose tend to be younger
sns.violinplot(data=dc, x='overdosed', y='age');