In [1]:
import numpy as np
import pandas as pd
import altair as alt
import matplotlib.pyplot as plt

In [2]:
deaths = pd.read_csv("05_cleaned_data/deaths_wa.csv")

In [3]:
deaths["StateName"].value_counts()

Washington    570
Oregon        469
Montana       421
Idaho         366
Wyoming       195
Nevada        190
Name: StateName, dtype: int64

In [4]:
deaths.columns

Index(['County', 'County Code', 'Year', 'Year Code',
       'Drug/Alcohol Induced Cause', 'Drug/Alcohol Induced Cause Code',
       'Deaths', 'State', 'StateFIPS', 'CountyName', 'StateName', 'CountyFIPS',
       'StateAbbr', 'STATE_COUNTY', 'state_abbrev', 'FIP_unique', 'Population',
       'county_test'],
      dtype='object')

In [5]:
deaths["Drug/Alcohol Induced Cause"].unique()

array(['Drug poisonings (overdose) Unintentional (X40-X44)',
       'Drug poisonings (overdose) Suicide (X60-X64)',
       'All other alcohol-induced causes',
       'All other non-drug and non-alcohol causes',
       'Drug poisonings (overdose) Undetermined (Y10-Y14)',
       'All other drug-induced causes',
       'Alcohol poisonings (overdose) (X45, X65, Y15)'], dtype=object)

In [6]:
washington = deaths[deaths["StateName"] == "Washington"]
comp = deaths[deaths["StateName"] != "Washington"]

In [7]:
wa_deaths = washington.copy()
comp_deaths = comp.copy()

In [8]:
overdose_list = ['Drug poisonings (overdose) Unintentional (X40-X44)',
       'Drug poisonings (overdose) Suicide (X60-X64)',
       'Drug poisonings (overdose) Undetermined (Y10-Y14)',
       'Alcohol poisonings (overdose) (X45, X65, Y15)']

In [9]:
wa_deaths["overdose"] = 0
comp_deaths["overdose"] = 0

In [10]:
wa_deaths.loc[wa_deaths["Drug/Alcohol Induced Cause"].isin(overdose_list), "overdose"] = 1
comp_deaths.loc[comp_deaths["Drug/Alcohol Induced Cause"].isin(overdose_list), "overdose"] = 1

In [11]:
wa_deaths["overdose_per_100k"] = wa_deaths["Deaths"] / wa_deaths["Population"] * 100_000
comp_deaths["overdose_per_100k"] = comp_deaths["Deaths"] / comp_deaths["Population"] * 100_000

In [12]:
wa_result = wa_deaths.groupby(["Year", "County"])["overdose_per_100k"].sum().reset_index()
comp_result = comp_deaths.groupby(["Year", "StateName", "CountyName"])["overdose_per_100k"].sum().reset_index()

In [13]:
# wa_result["after_change"] = 0
# comp_result["after_change"] = 0

In [14]:
# wa_result.loc[wa_result["Year"] >= 2012, "after_change"] = 1
# comp_result.loc[comp_result["Year"] >= 2012, "after_change"] = 1

In [15]:
wa_deaths.groupby("Year")["overdose_per_100k"].agg([np.mean, np.std])

Unnamed: 0_level_0,mean,std
Year,Unnamed: 1_level_1,Unnamed: 2_level_1
2009.0,441.973398,448.706213
2010.0,422.602752,447.161705
2011.0,438.554818,475.68087
2012.0,409.003466,453.86478
2013.0,429.313541,463.813793
2014.0,408.971871,453.841899
2015.0,425.514825,485.933613


In [16]:
wa_deaths_b4 = wa_deaths[wa_deaths["Year"] < 2012]
wa_deaths_after = wa_deaths[wa_deaths["Year"] >= 2012]

In [17]:
# attempt 1
# washington
# https://altair-viz.github.io/user_guide/transform/aggregate.html

source_data = wa_deaths_b4

plot_wa_b4 = alt.Chart(source_data).mark_point().encode(
    y=alt.Y("mean_overdose:Q", scale=alt.Scale(zero=False)),
    x=alt.X("Year:O", scale=alt.Scale(zero=False))
).transform_aggregate(
    mean_overdose='mean(overdose_per_100k)',
    groupby=["Year"]
)

plot_wa_b4

In [18]:
fit_wa_b4 = plot_wa_b4.transform_regression('Year', 'mean_overdose',method="linear"
).mark_line()

fit_wa_b4

In [19]:
source_data = wa_deaths_after

plot_wa_after = alt.Chart(source_data).mark_point().encode(
    y=alt.Y("mean_overdose:Q", scale=alt.Scale(zero=False)),
    x=alt.X("Year:O", scale=alt.Scale(zero=False))
).transform_aggregate(
    mean_overdose='mean(overdose_per_100k)',
    groupby=["Year"]
)

plot_wa_after

In [20]:
fit_wa_after = plot_wa_after.transform_regression('Year', 'mean_overdose',method="linear"
).mark_line()

fit_wa_after

In [21]:
plot_wa_b4 + fit_wa_b4 + plot_wa_after + fit_wa_after

In [22]:
# comparison states
comp_deaths_b4 = comp_deaths[comp_deaths["Year"] < 2012]
comp_deaths_after = comp_deaths[comp_deaths["Year"] >= 2012]

source_data = comp_deaths_b4

plot_comp_b4 = alt.Chart(source_data).mark_point().encode(
    y=alt.Y("mean_overdose:Q", scale=alt.Scale(zero=False)),
    x=alt.X("Year:O", scale=alt.Scale(zero=False))
).transform_aggregate(
    mean_overdose='mean(overdose_per_100k)',
    groupby=["Year"]
)

plot_comp_b4
fit_comp_b4 = plot_comp_b4.transform_regression('Year', 'mean_overdose',method="linear"
).mark_line()

fit_comp_b4

source_data = comp_deaths_after

plot_comp_after = alt.Chart(source_data).mark_point().encode(
    y=alt.Y("mean_overdose:Q", scale=alt.Scale(zero=False)),
    x=alt.X("Year:O", scale=alt.Scale(zero=False))
).transform_aggregate(
    mean_overdose='mean(overdose_per_100k)',
    groupby=["Year"]
)

plot_comp_after

fit_comp_after = plot_comp_after.transform_regression('Year', 'mean_overdose',method="linear"
).mark_line()

fit_comp_after
plot_comp_b4 + fit_comp_b4 + plot_comp_after + fit_comp_after