# Florida Difference In Difference Analysis Of Drug Overdose Deaths

In [23]:
# importing the required libraries 
import pandas as pd
import numpy as np
import altair as alt
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [24]:
# loading pre-cleaned data on deaths in Florida and its neighbouring states 
deaths = pd.read_csv(r"C:\Users\annap\OneDrive\Desktop\Opioid Project\Deaths.csv")
deaths.head(3)

Unnamed: 0,County,County Code,Year,Year Code,Drug/Alcohol Induced Cause,Drug/Alcohol Induced Cause Code,Deaths,State,StateFIPS,CountyName,StateName,CountyFIPS,StateAbbr,STATE_COUNTY,state_abbrev,FIP_unique,Population,county_test
0,Autauga,1001.0,2007.0,2007.0,All other non-drug and non-alcohol causes,O9,399.0,AL,1,Autauga,Alabama,1001,AL,AL | AUTAUGA,AL,10011,52405.0,Autauga
1,Autauga,1001.0,2008.0,2008.0,All other non-drug and non-alcohol causes,O9,448.0,AL,1,Autauga,Alabama,1001,AL,AL | AUTAUGA,AL,10011,53277.0,Autauga
2,Autauga,1001.0,2009.0,2009.0,All other non-drug and non-alcohol causes,O9,408.0,AL,1,Autauga,Alabama,1001,AL,AL | AUTAUGA,AL,10011,54135.0,Autauga


In [25]:
deaths.columns

Index(['County', 'County Code', 'Year', 'Year Code',
       'Drug/Alcohol Induced Cause', 'Drug/Alcohol Induced Cause Code',
       'Deaths', 'State', 'StateFIPS', 'CountyName', 'StateName', 'CountyFIPS',
       'StateAbbr', 'STATE_COUNTY', 'state_abbrev', 'FIP_unique', 'Population',
       'county_test'],
      dtype='object')

In [26]:
# creating a copy of deaths dataset, to prevent SettingWithCopy warnings. The irrelevant columns are dropped.

deaths_copy = deaths.copy()
deaths_copy.drop(["County Code","Year Code","Drug/Alcohol Induced Cause Code","State", "CountyName", "StateAbbr","STATE_COUNTY","county_test"],axis=1, inplace = True)
deaths_copy.head(3)

Unnamed: 0,County,Year,Drug/Alcohol Induced Cause,Deaths,StateFIPS,StateName,CountyFIPS,state_abbrev,FIP_unique,Population
0,Autauga,2007.0,All other non-drug and non-alcohol causes,399.0,1,Alabama,1001,AL,10011,52405.0
1,Autauga,2008.0,All other non-drug and non-alcohol causes,448.0,1,Alabama,1001,AL,10011,53277.0
2,Autauga,2009.0,All other non-drug and non-alcohol causes,408.0,1,Alabama,1001,AL,10011,54135.0


In [27]:
# converting some of the columns to more appropriate data type - integer data type
deaths_copy["Year"] = deaths_copy["Year"].astype("int64")
deaths_copy["Deaths"] = deaths_copy["Deaths"].astype("int64")
deaths_copy["Population"] = deaths_copy["Population"].astype("int64")


# renaming some of the columns to appear more conventional
deaths_copy.rename(columns={"StateFIPS": "State FIPS", "CountyFIPS": "County FIPS", "StateName":"State", "state_abbrev":"State Abbreviation", "FIP_unique":"FIPS_Unique"},inplace=True)

deaths_copy.head(3)


Unnamed: 0,County,Year,Drug/Alcohol Induced Cause,Deaths,State FIPS,State,County FIPS,State Abbreviation,FIPS_Unique,Population
0,Autauga,2007,All other non-drug and non-alcohol causes,399,1,Alabama,1001,AL,10011,52405
1,Autauga,2008,All other non-drug and non-alcohol causes,448,1,Alabama,1001,AL,10011,53277
2,Autauga,2009,All other non-drug and non-alcohol causes,408,1,Alabama,1001,AL,10011,54135


In [28]:
deaths_copy["State"].unique()


array(['Alabama', 'Florida', 'Georgia', 'Mississippi', 'South Carolina',
       'Tennessee'], dtype=object)

In [29]:
deaths_copy["Drug/Alcohol Induced Cause"].unique()

array(['All other non-drug and non-alcohol causes',
       'Drug poisonings (overdose) Unintentional (X40-X44)',
       'All other alcohol-induced causes',
       'Drug poisonings (overdose) Undetermined (Y10-Y14)',
       'All other drug-induced causes',
       'Drug poisonings (overdose) Suicide (X60-X64)',
       'Alcohol poisonings (overdose) (X45, X65, Y15)'], dtype=object)

In [30]:
# creating the dataset for Florida which has all deaths that are drug related 

florida_deaths = deaths_copy[
    (deaths_copy["State"] == "Florida")
    & (
        (
            deaths_copy["Drug/Alcohol Induced Cause"]
            == "Drug poisonings (overdose) Unintentional (X40-X44)"
        )
        | (
            deaths_copy["Drug/Alcohol Induced Cause"]
            == "Drug poisonings (overdose) Undetermined (Y10-Y14)"
        )
        | (
            deaths_copy["Drug/Alcohol Induced Cause"]
            == "Drug poisonings (overdose) Suicide (X60-X64)"
        )
    )
]

In [9]:
florida_deaths.head(5)

Unnamed: 0,County,Year,Drug/Alcohol Induced Cause,Deaths,State FIPS,State,County FIPS,State Abbreviation,FIPS_Unique,Population
618,Alachua,2007,Drug poisonings (overdose) Unintentional (X40-...,17,12,Florida,12001,FL,1200112,242685
623,Alachua,2009,Drug poisonings (overdose) Unintentional (X40-...,15,12,Florida,12001,FL,1200112,246657
626,Alachua,2010,Drug poisonings (overdose) Unintentional (X40-...,15,12,Florida,12001,FL,1200112,247614
629,Alachua,2011,Drug poisonings (overdose) Unintentional (X40-...,14,12,Florida,12001,FL,1200112,249834
632,Alachua,2012,Drug poisonings (overdose) Unintentional (X40-...,14,12,Florida,12001,FL,1200112,251520


In [10]:
florida_deaths_copy = florida_deaths.copy()

In [11]:
# calculating the drug related death rates in Florida per 100000 people 

florida_deaths_copy["Overdose_Per_100K"] = (florida_deaths_copy["Deaths"] / florida_deaths_copy["Population"]) * 100_000

florida_deaths_copy.head(3)

Unnamed: 0,County,Year,Drug/Alcohol Induced Cause,Deaths,State FIPS,State,County FIPS,State Abbreviation,FIPS_Unique,Population,Overdose_Per_100K
618,Alachua,2007,Drug poisonings (overdose) Unintentional (X40-...,17,12,Florida,12001,FL,1200112,242685,7.004965
623,Alachua,2009,Drug poisonings (overdose) Unintentional (X40-...,15,12,Florida,12001,FL,1200112,246657,6.081319
626,Alachua,2010,Drug poisonings (overdose) Unintentional (X40-...,15,12,Florida,12001,FL,1200112,247614,6.057816


In [12]:
# calculating and displaying year and county wise results for drug related deaths in Florida

florida_result = florida_deaths_copy.groupby(["Year", "County"])["Overdose_Per_100K"].sum().reset_index()
florida_result.head(5)

Unnamed: 0,Year,County,Overdose_Per_100K
0,2007,Alachua,7.004965
1,2007,Bay,18.143881
2,2007,Brevard,19.269286
3,2007,Broward,15.225255
4,2007,Citrus,12.058961


In [14]:
# creating a new dataset, that contains drug related deaths in other neighboring states of Florida - Alabama, Georgia, Mississippi, South Carolin and Tennessee

other_states_deaths = deaths_copy[
    (deaths_copy["State"] != "Florida")
    & (
        (
            deaths_copy["Drug/Alcohol Induced Cause"]
            == "Drug poisonings (overdose) Unintentional (X40-X44)"
        )
        | (
            deaths_copy["Drug/Alcohol Induced Cause"]
            == "Drug poisonings (overdose) Undetermined (Y10-Y14)"
        )
        | (
            deaths_copy["Drug/Alcohol Induced Cause"]
            == "Drug poisonings (overdose) Suicide (X60-X64)"
        )
    )
]

In [15]:
other_states_deaths_copy = other_states_deaths.copy()

other_states_deaths_copy["Overdose_Per_100K"] = (other_states_deaths_copy["Deaths"] / other_states_deaths_copy["Population"]) * 100_000

In [16]:
# calculating and displaying results for drug related deaths - year, state and county wise 

other_states_result = other_states_deaths_copy.groupby(["Year", "State", "County"])["Overdose_Per_100K"].sum().reset_index()
other_states_result.head()


Unnamed: 0,Year,State,County,Overdose_Per_100K
0,2007,Alabama,Baldwin,13.920791
1,2007,Alabama,Calhoun,9.465541
2,2007,Alabama,Houston,10.232587
3,2007,Alabama,Jefferson,10.226463
4,2007,Alabama,Lauderdale,17.561768


In [17]:
# calculating yearwise drug related deaths results for Alabama 
alabama_results = other_states_deaths_copy[other_states_deaths_copy["State"] == "Alabama"].groupby("Year")["Overdose_Per_100K"].mean()
alabama_results

Year
2007    13.368704
2008    16.361755
2009    15.272194
2010    14.556800
2011    14.767924
2012    14.834840
2013    18.018408
Name: Overdose_Per_100K, dtype: float64

In [18]:
# calculating yearwise drug related deaths results for Georgia
georgia_results = other_states_deaths_copy[other_states_deaths_copy["State"] == "Georgia"].groupby("Year")["Overdose_Per_100K"].mean()
georgia_results

Year
2007    11.960843
2008    10.328943
2009    11.745507
2010    13.869420
2011    12.381299
2012    11.653082
2013    11.902365
Name: Overdose_Per_100K, dtype: float64

In [19]:
# calculating yearwise drug related deaths results for Mississippi
mississippi_results = other_states_deaths_copy[other_states_deaths_copy["State"] == "Mississippi"].groupby("Year")["Overdose_Per_100K"].mean()
mississippi_results

Year
2007    21.903231
2008    17.037801
2009    20.989875
2010    18.498209
2011    16.426466
2012    15.167236
2013    19.345719
Name: Overdose_Per_100K, dtype: float64

In [20]:
# calculating yearwise drug related deaths results for South Carolina
south_carolina_results = other_states_deaths_copy[other_states_deaths_copy["State"] == "South Carolina"].groupby("Year")["Overdose_Per_100K"].mean()
south_carolina_results

Year
2007    12.447756
2008    12.431124
2009    13.862295
2010    14.429518
2011    12.253051
2012    12.683164
2013    13.080714
Name: Overdose_Per_100K, dtype: float64

In [21]:
# calculating yearwise drug related deaths results for Tennessee
tennessee_results = other_states_deaths_copy[other_states_deaths_copy["State"] == "Tennessee"].groupby("Year")["Overdose_Per_100K"].mean()
tennessee_results

Year
2007    16.285685
2008    14.739367
2009    14.611214
2010    17.048880
2011    18.678175
2012    16.506248
2013    20.844220
Name: Overdose_Per_100K, dtype: float64

In [22]:
# calculating and displaying yearwise results for drug related deaths in Florida 

florida_yearwise_results = florida_deaths_copy.groupby("Year")["Overdose_Per_100K"].mean()
florida_yearwise_results

Year
2007    10.589778
2008    11.389320
2009    10.090938
2010    11.976183
2011    11.423950
2012    10.435346
2013     9.046919
Name: Overdose_Per_100K, dtype: float64