# Difference-In-Difference Analysis Of Opioid Overdose Deaths In Florida

In [32]:
# Importing the required libraries

import pandas as pd
import numpy as np
import altair as alt
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [33]:
# Loading data on deaths in Florida and its comparison states - Michigan, North Carolina, Pennsylvania 
deaths = pd.read_csv(r"C:\Users\annap\Downloads\deaths_fl.csv")

In [34]:
deaths.head(3)

Unnamed: 0,County,County Code,Year,Year Code,Drug/Alcohol Induced Cause,Drug/Alcohol Induced Cause Code,Deaths,State,StateFIPS,CountyName,StateName,CountyFIPS,StateAbbr,STATE_COUNTY,state_abbrev,FIP_unique,Population,county_test
0,Alachua,12001.0,2007.0,2007.0,Drug poisonings (overdose) Unintentional (X40-...,D1,17.0,FL,12,Alachua,Florida,12001,FL,FL | ALACHUA,FL,1200112,242685.0,Alachua
1,Alachua,12001.0,2007.0,2007.0,All other alcohol-induced causes,A9,22.0,FL,12,Alachua,Florida,12001,FL,FL | ALACHUA,FL,1200112,242685.0,Alachua
2,Alachua,12001.0,2007.0,2007.0,All other non-drug and non-alcohol causes,O9,1578.0,FL,12,Alachua,Florida,12001,FL,FL | ALACHUA,FL,1200112,242685.0,Alachua


In [35]:
# Creating a copy of deaths dataset, to prevent SettingWithCopy warnings. The irrelevant columns are dropped.

deaths_copy = deaths.copy()
deaths_copy.drop(
    [
        "County Code",
        "Year Code",
        "Drug/Alcohol Induced Cause Code",
        "State",
        "CountyName",
        "StateAbbr",
        "STATE_COUNTY",
        "county_test",
    ],
    axis=1,
    inplace=True,
)

In [36]:
# Checking the data after dropping the unwanted attributes.
deaths_copy.head(3)


Unnamed: 0,County,Year,Drug/Alcohol Induced Cause,Deaths,StateFIPS,StateName,CountyFIPS,state_abbrev,FIP_unique,Population
0,Alachua,2007.0,Drug poisonings (overdose) Unintentional (X40-...,17.0,12,Florida,12001,FL,1200112,242685.0
1,Alachua,2007.0,All other alcohol-induced causes,22.0,12,Florida,12001,FL,1200112,242685.0
2,Alachua,2007.0,All other non-drug and non-alcohol causes,1578.0,12,Florida,12001,FL,1200112,242685.0


In [37]:
# Converting some of the columns to more appropriate data type - integer data type.
deaths_copy["Year"] = deaths_copy["Year"].astype("int64")
deaths_copy["Deaths"] = deaths_copy["Deaths"].astype("int64")
deaths_copy["Population"] = deaths_copy["Population"].astype("int64")


# Renaming some of the columns to appear more conventional and formal.
deaths_copy.rename(
    columns={
        "StateFIPS": "State FIPS",
        "CountyFIPS": "County FIPS",
        "StateName": "State",
        "state_abbrev": "State Abbreviation",
        "FIP_unique": "FIPS_Unique",
    },
    inplace=True,
)


In [38]:
# Checking the data after making the modifications.
deaths_copy.head(3)

Unnamed: 0,County,Year,Drug/Alcohol Induced Cause,Deaths,State FIPS,State,County FIPS,State Abbreviation,FIPS_Unique,Population
0,Alachua,2007,Drug poisonings (overdose) Unintentional (X40-...,17,12,Florida,12001,FL,1200112,242685
1,Alachua,2007,All other alcohol-induced causes,22,12,Florida,12001,FL,1200112,242685
2,Alachua,2007,All other non-drug and non-alcohol causes,1578,12,Florida,12001,FL,1200112,242685


In [39]:
deaths_copy["State"].unique()

array(['Florida', 'Michigan', 'North Carolina', 'Pennsylvania'],
      dtype=object)

In [40]:
# Checking the different causes of deaths that occured due to drugs or other causes such as alcohol.
deaths_copy["Drug/Alcohol Induced Cause"].unique()


array(['Drug poisonings (overdose) Unintentional (X40-X44)',
       'All other alcohol-induced causes',
       'All other non-drug and non-alcohol causes',
       'Drug poisonings (overdose) Suicide (X60-X64)',
       'All other drug-induced causes',
       'Drug poisonings (overdose) Undetermined (Y10-Y14)',
       'Alcohol poisonings (overdose) (X45, X65, Y15)'], dtype=object)

In [41]:
# Creating the dataset for Florida which has all deaths due to drug overdose.

florida_deaths = deaths_copy[
    (deaths_copy["State"] == "Florida")
    & (
        (
            deaths_copy["Drug/Alcohol Induced Cause"]
            == "Drug poisonings (overdose) Unintentional (X40-X44)"
        )
        | (
            deaths_copy["Drug/Alcohol Induced Cause"]
            == "Drug poisonings (overdose) Undetermined (Y10-Y14)"
        )
        | (
            deaths_copy["Drug/Alcohol Induced Cause"]
            == "Drug poisonings (overdose) Suicide (X60-X64)"
        )
    )
]

In [42]:
florida_deaths.head(5)


Unnamed: 0,County,Year,Drug/Alcohol Induced Cause,Deaths,State FIPS,State,County FIPS,State Abbreviation,FIPS_Unique,Population
0,Alachua,2007,Drug poisonings (overdose) Unintentional (X40-...,17,12,Florida,12001,FL,1200112,242685
5,Alachua,2009,Drug poisonings (overdose) Unintentional (X40-...,15,12,Florida,12001,FL,1200112,246657
8,Alachua,2010,Drug poisonings (overdose) Unintentional (X40-...,15,12,Florida,12001,FL,1200112,247614
11,Alachua,2011,Drug poisonings (overdose) Unintentional (X40-...,14,12,Florida,12001,FL,1200112,249834
14,Alachua,2012,Drug poisonings (overdose) Unintentional (X40-...,14,12,Florida,12001,FL,1200112,251520


In [43]:
# Creating a copy to avoid problems such as SettingWithCopy warning. 
florida_deaths_copy = florida_deaths.copy()

In [44]:
# Calculating the drug overdose death rates in Florida per 100000 people.

florida_deaths_copy["Opioid_Overdose_Deaths_Per_100K"] = (
    florida_deaths_copy["Deaths"] / florida_deaths_copy["Population"]
) * 100_000

florida_deaths_copy.head(3)


Unnamed: 0,County,Year,Drug/Alcohol Induced Cause,Deaths,State FIPS,State,County FIPS,State Abbreviation,FIPS_Unique,Population,Opioid_Overdose_Deaths_Per_100K
0,Alachua,2007,Drug poisonings (overdose) Unintentional (X40-...,17,12,Florida,12001,FL,1200112,242685,7.004965
5,Alachua,2009,Drug poisonings (overdose) Unintentional (X40-...,15,12,Florida,12001,FL,1200112,246657,6.081319
8,Alachua,2010,Drug poisonings (overdose) Unintentional (X40-...,15,12,Florida,12001,FL,1200112,247614,6.057816


In [45]:
# Calculating and displaying year and county wise results for drug overdose deaths in Florida.

florida_result = (
    florida_deaths_copy.groupby(["Year", "County"])["Opioid_Overdose_Deaths_Per_100K"]
    .sum()
    .reset_index()
)
florida_result.head(5)

Unnamed: 0,Year,County,Opioid_Overdose_Deaths_Per_100K
0,2007,Alachua,7.004965
1,2007,Bay,18.143881
2,2007,Brevard,19.269286
3,2007,Broward,15.225255
4,2007,Citrus,12.058961


In [46]:
# Calculating and displaying yearwise results for drug overdose deaths in Florida

florida_result_yearwise = florida_deaths_copy.groupby("Year")[
    "Opioid_Overdose_Deaths_Per_100K"
].mean()
florida_result_yearwise

Year
2007    10.589778
2008    11.389320
2009    10.090938
2010    11.976183
2011    11.423950
2012    10.435346
2013     9.046919
Name: Opioid_Overdose_Deaths_Per_100K, dtype: float64

In [47]:
# Creating a new dataset, that contains drug overdose deaths in the comparison states for Florida - Pennsylvania, North Carolina, Michigan

other_states_deaths = deaths_copy[
    (deaths_copy["State"] != "Florida")
    & (
        (
            deaths_copy["Drug/Alcohol Induced Cause"]
            == "Drug poisonings (overdose) Unintentional (X40-X44)"
        )
        | (
            deaths_copy["Drug/Alcohol Induced Cause"]
            == "Drug poisonings (overdose) Undetermined (Y10-Y14)"
        )
        | (
            deaths_copy["Drug/Alcohol Induced Cause"]
            == "Drug poisonings (overdose) Suicide (X60-X64)"
        )
    )
]

In [48]:
other_states_deaths_copy = other_states_deaths.copy()

other_states_deaths_copy["Opioid_Overdose_Deaths_Per_100K"] = (
    other_states_deaths_copy["Deaths"] / other_states_deaths_copy["Population"]
) * 100_000


In [49]:
# calculating and displaying results for drug related deaths - year, state and county wise

other_states_result = (
    other_states_deaths_copy.groupby(["Year", "State", "County"])["Opioid_Overdose_Deaths_Per_100K"]
    .sum()
    .reset_index()
)
other_states_result.head()

Unnamed: 0,Year,State,County,Opioid_Overdose_Deaths_Per_100K
0,2007,Michigan,Allegan,15.256625
1,2007,Michigan,Bay,11.097547
2,2007,Michigan,Berrien,9.531192
3,2007,Michigan,Calhoun,18.170982
4,2007,Michigan,Eaton,9.228668


In [50]:
# calculating yearwise drug related deaths results for Pennsylvania
pennsylvania_results_yearwise = (
    other_states_deaths_copy[other_states_deaths_copy["State"] == "Pennsylvania"]
    .groupby("Year")["Opioid_Overdose_Deaths_Per_100K"]
    .mean()
)
pennsylvania_results_yearwise

Year
2007    10.568628
2008    10.032986
2009    10.909417
2010    11.145112
2011    13.361846
2012    14.207407
2013    15.010161
Name: Opioid_Overdose_Deaths_Per_100K, dtype: float64

In [51]:
# calculating yearwise drug related deaths results for Michigan
michigan_results_yearwise = (
    other_states_deaths_copy[other_states_deaths_copy["State"] == "Michigan"]
    .groupby("Year")["Opioid_Overdose_Deaths_Per_100K"]
    .mean()
)
michigan_results_yearwise

Year
2007     9.107856
2008     8.460635
2009     9.559062
2010     9.279873
2011    11.657133
2012    10.469244
2013    11.903580
Name: Opioid_Overdose_Deaths_Per_100K, dtype: float64

In [52]:
# calculating yearwise drug related deaths results for North Carolina
north_carolina_results_yearwise = (
    other_states_deaths_copy[other_states_deaths_copy["State"] == "North Carolina"]
    .groupby("Year")["Opioid_Overdose_Deaths_Per_100K"]
    .mean()
)
north_carolina_results_yearwise

Year
2007    12.942485
2008    13.710444
2009    13.883047
2010    12.854667
2011    15.386506
2012    13.123403
2013    13.480220
Name: Opioid_Overdose_Deaths_Per_100K, dtype: float64

In [53]:
# calculating and displaying yearwise results for drug related deaths in neighboring states around Florida (combined)

other_states_results_combined_yearwise = other_states_deaths_copy.groupby("Year")[
    "Opioid_Overdose_Deaths_Per_100K"
].mean()
other_states_results_combined_yearwise

Year
2007    10.837647
2008    10.988707
2009    11.574248
2010    11.191798
2011    13.653797
2012    12.937159
2013    13.719112
Name: Opioid_Overdose_Deaths_Per_100K, dtype: float64

In [54]:
# these yearwise results have been calculated while doing the pre-post analysis of drug overdose deaths in Florida
florida_result_yearwise


Year
2007    10.589778
2008    11.389320
2009    10.090938
2010    11.976183
2011    11.423950
2012    10.435346
2013     9.046919
Name: Opioid_Overdose_Deaths_Per_100K, dtype: float64

In [55]:
florida_stats = florida_deaths_copy.groupby(["Year"])["Opioid_Overdose_Deaths_Per_100K"].mean().reset_index()
other_states_stats = other_states_deaths_copy.groupby(["Year"])["Opioid_Overdose_Deaths_Per_100K"].mean().reset_index()



florida_summary = pd.DataFrame(florida_stats.describe()["Opioid_Overdose_Deaths_Per_100K"]).rename(
    columns={"Opioid_Overdose_Deaths_Per_100K": "Opioid Overdose Deaths Per 100K Residents - Florida"}
)
other_summary = pd.DataFrame(
    other_states_stats.describe()["Opioid_Overdose_Deaths_Per_100K"]
).rename(columns={"Opioid_Overdose_Deaths_Per_100K": "Opioid Overdose Deaths Per 100K Residents - Control States (MI, NC, PA)"})
summary_statistics = pd.concat([florida_summary, other_summary], axis=1)
summary_statistics


Unnamed: 0,Opioid Overdose Deaths Per 100K Residents - Florida,"Opioid Overdose Deaths Per 100K Residents - Control States (MI, NC, PA)"
count,7.0,7.0
mean,10.707491,12.128924
std,0.984322,1.268893
min,9.046919,10.837647
25%,10.263142,11.090252
50%,10.589778,11.574248
75%,11.406635,13.295478
max,11.976183,13.719112


In [56]:
# Splitting the results for drug overdose deaths in Florida before and after 2010 - the year of policy implementation

florida_before = florida_result[florida_result["Year"] < 2010]
florida_after = florida_result[florida_result["Year"] >= 2010]


# Splitting the results for drug overdose deaths in comparison states for Florida, before and after 2010 - the year of policy implementation in Florida


other_states_before = other_states_result[other_states_result["Year"] < 2010]
other_states_after = other_states_result[other_states_result["Year"] >= 2010]



In [57]:
# Function to plot a vertical line at year of policy implementation
def vertical_line(year):
   
    line = alt.Chart(pd.DataFrame({
    'Year': [year],
    'color': ["black"]
    })).mark_rule().encode(
    x='Year:Q',
    color=alt.Color('color:N', scale=None)
    )
 
    return line


In [58]:
# creating the vertical line at 2010, the year of implementation of policy changes in Florida
line = vertical_line(2010)



In [59]:
# creating the regression model and calculating the error bands for creating the plots
 
def get_reg_fit_and_ci(data, color, xvar, yvar, legend, alpha=0.05):
   
    # Creating the grid for predicted values
    colour = color
    years = [2007,2008,2009,2010,2011,2012,2013]
    x = data.loc[pd.notnull(data[yvar]), xvar]
    xmin = x.min()
    xmax = x.max()
    step = (xmax - xmin) / 100
    grid = np.arange(xmin, xmax + step, step)
    predictions = pd.DataFrame({xvar: grid})
 
    # Fitting the model and making the predictions
    model = smf.ols(f"{yvar} ~ {xvar}", data=data).fit()
    model_predict = model.get_prediction(predictions[xvar])
    predictions[yvar] = model_predict.summary_frame()["mean"]
    predictions[["ci_low", "ci_high"]] = model_predict.conf_int(alpha=alpha)
   
    # Utilizing the predictions to create the points and error bands in the chart
    predictions["States-Before/After"] = f"{legend}"
    reg = (
        alt.Chart(predictions)
        .mark_line()
        .encode(
            x=xvar,
            y=alt.Y(yvar),
            color = alt.Color("States-Before/After",legend=alt.Legend(title = "States"))
        )
    )
 
    ci = (
        alt.Chart(predictions)
        .mark_errorband()
        .encode(
            alt.X(f"{xvar}:Q", axis=alt.Axis(format=".0f", values=years)),
            y=alt.Y(
                "ci_low",
                title="Opioid Overdose Deaths Per 100K Residents",
                scale=alt.Scale(zero=False),
            ),
            y2="ci_high",
            color=alt.value(f"{color}"),
        )
    )
    chart = ci + reg
    return predictions, chart
 
 


In [60]:
# Using the get_reg_fit_and_ci function to create the charts
 
def build_chart(data, color, xvar, yvar, legend, alpha=0.05):
    fit, reg_chart = get_reg_fit_and_ci(
        data=data, color=color, xvar=xvar, yvar=yvar,  legend=legend, alpha=alpha,
    )
    return reg_chart
 


In [61]:
# creating the final plot for pre-post analysis of drug overdose deaths in Florida
 
florida_before_chart = build_chart(
    florida_before, "orange", "Year","Opioid_Overdose_Deaths_Per_100K", "Florida",  alpha=0.05
)
 
florida_after_chart = build_chart(
    florida_after, "orange", "Year", "Opioid_Overdose_Deaths_Per_100K", "Florida", alpha=0.05
)

other_states_before_chart = build_chart(
    other_states_before, "blue", "Year","Opioid_Overdose_Deaths_Per_100K", "Control States - MI,NC,PA",  alpha=0.05
)
 
other_states_after_chart = build_chart(
    other_states_after, "blue", "Year", "Opioid_Overdose_Deaths_Per_100K", "Control States - MI,NC,PA", alpha=0.05
)
 
florida_final_diff_in_diff_deaths = (florida_before_chart + florida_after_chart + other_states_before_chart + other_states_after_chart + line).properties(title="Difference-In-Difference Analysis Of Opioid Overdose Deaths In Florida")
 
florida_final_diff_in_diff_deaths
 
