In [82]:
# importing the required libraries 
import pandas as pd
import numpy as np
import altair as alt
import statsmodels.api as sm
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt
import seaborn as sns


## Diff-in-Diff Analysis Of Drug Related Prescriptions in Washington

In [83]:
# loading data for drug prescriptions in Florida and neighbouring states 
prescriptions_reduced =  pd.read_parquet("../20_intermediate_files/prescriptions_wa.parquet")

In [84]:
prescriptions_reduced.columns

Index(['BUYER_STATE', 'BUYER_COUNTY', 'CountyFIPS_x', 'StateFIPS_x', 'Year',
       'MME', 'FIP_unique', 'County', 'State', 'Population', 'county_test',
       'state_abbrev', 'CountyFIPS_y', 'StateFIPS_y', 'CountyName', '_merge'],
      dtype='object')

In [85]:
# # reducing the number of columns in the drug prescriptions dataset by including only the relevant attributes to create a new dataset

# prescriptions_reduced = prescriptions[
#     [
#         "DRUG_CODE",
#         "DRUG_NAME",
#         "QUANTITY",
#         "UNIT",
#         "STRENGTH",
#         "CALC_BASE_WT_IN_GM",
#         "DOSAGE_UNIT",
#         "Product_Name",
#         "Ingredient_Name",
#         "Measure",
#         "MME_Conversion_Factor",
#         "dos_str",
#         "Year",
#         "Month",
#         "StateFIPS",
#         "StateName",
#         "CountyFIPS",
#         "state_abbrev",
#         "FIP_unique",
#         "Population",
#         "county_test",
#     ]
# ]

In [86]:
prescriptions_reduced

Unnamed: 0_level_0,BUYER_STATE,BUYER_COUNTY,CountyFIPS_x,StateFIPS_x,Year,MME,FIP_unique,County,State,Population,county_test,state_abbrev,CountyFIPS_y,StateFIPS_y,CountyName,_merge
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
3,CO,adams,8001,8,2009.0,110355.414760,80018,Adams County,CO,435700.0,Adams,CO,8001.0,8.0,Adams,both
4,CO,adams,8001,8,2010.0,132304.866282,80018,Adams County,CO,443691.0,Adams,CO,8001.0,8.0,Adams,both
5,CO,adams,8001,8,2011.0,157909.630983,80018,Adams County,CO,452201.0,Adams,CO,8001.0,8.0,Adams,both
6,CO,adams,8001,8,2012.0,161238.256294,80018,Adams County,CO,460558.0,Adams,CO,8001.0,8.0,Adams,both
7,CO,adams,8001,8,2013.0,134536.857641,80018,Adams County,CO,469978.0,Adams,CO,8001.0,8.0,Adams,both
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7308,CO,mineral,8079,8,2009.0,0.000000,80798,Mineral County,CO,732.0,Mineral,CO,8079.0,8.0,Mineral,both
7309,CO,mineral,8079,8,2011.0,0.000000,80798,Mineral County,CO,711.0,Mineral,CO,8079.0,8.0,Mineral,both
7310,CO,mineral,8079,8,2012.0,0.000000,80798,Mineral County,CO,719.0,Mineral,CO,8079.0,8.0,Mineral,both
7311,CO,mineral,8079,8,2013.0,0.000000,80798,Mineral County,CO,732.0,Mineral,CO,8079.0,8.0,Mineral,both


In [87]:
# creating a copy of reduced dataset of prescriptions and converting some of the attributes to appropriate data type

prescriptions_reduced_copy = prescriptions_reduced.copy()

prescriptions_reduced_copy["Year"] = prescriptions_reduced_copy["Year"].astype("int64")
# prescriptions_reduced_copy["DRUG_CODE"] = prescriptions_reduced_copy["DRUG_CODE"].astype("int64")
# prescriptions_reduced_copy["Month"] = prescriptions_reduced_copy["Month"].astype("int64")
# prescriptions_reduced_copy["Population"] = prescriptions_reduced_copy["Population"].astype("int64")

In [88]:
prescriptions_reduced_copy

Unnamed: 0_level_0,BUYER_STATE,BUYER_COUNTY,CountyFIPS_x,StateFIPS_x,Year,MME,FIP_unique,County,State,Population,county_test,state_abbrev,CountyFIPS_y,StateFIPS_y,CountyName,_merge
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
3,CO,adams,8001,8,2009,110355.414760,80018,Adams County,CO,435700.0,Adams,CO,8001.0,8.0,Adams,both
4,CO,adams,8001,8,2010,132304.866282,80018,Adams County,CO,443691.0,Adams,CO,8001.0,8.0,Adams,both
5,CO,adams,8001,8,2011,157909.630983,80018,Adams County,CO,452201.0,Adams,CO,8001.0,8.0,Adams,both
6,CO,adams,8001,8,2012,161238.256294,80018,Adams County,CO,460558.0,Adams,CO,8001.0,8.0,Adams,both
7,CO,adams,8001,8,2013,134536.857641,80018,Adams County,CO,469978.0,Adams,CO,8001.0,8.0,Adams,both
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7308,CO,mineral,8079,8,2009,0.000000,80798,Mineral County,CO,732.0,Mineral,CO,8079.0,8.0,Mineral,both
7309,CO,mineral,8079,8,2011,0.000000,80798,Mineral County,CO,711.0,Mineral,CO,8079.0,8.0,Mineral,both
7310,CO,mineral,8079,8,2012,0.000000,80798,Mineral County,CO,719.0,Mineral,CO,8079.0,8.0,Mineral,both
7311,CO,mineral,8079,8,2013,0.000000,80798,Mineral County,CO,732.0,Mineral,CO,8079.0,8.0,Mineral,both


In [89]:
# creating a dataset that has all the drug prescriptions in the state of Florida 

washington_prescriptions = prescriptions_reduced_copy[prescriptions_reduced_copy["State"]=="WA"]
# washington_prescriptions_copy = washington_prescriptions.copy()
# washington_prescriptions["shipment_per_100k"] = (
#     (
#         washington_prescriptions["dos_str"]
#         * washington_prescriptions["DOSAGE_UNIT"]
#         * washington_prescriptions["MME_Conversion_Factor"]
#     )
#     / (washington_prescriptions["Population"])
#     * 100000
# )

washington_prescriptions_copy = washington_prescriptions.copy()
washington_prescriptions_copy["shipment_per_100k"] = (
        washington_prescriptions_copy["MME"]
    / (washington_prescriptions_copy["Population"])
    * 100000
)

washington_prescriptions_copy

Unnamed: 0_level_0,BUYER_STATE,BUYER_COUNTY,CountyFIPS_x,StateFIPS_x,Year,MME,FIP_unique,County,State,Population,county_test,state_abbrev,CountyFIPS_y,StateFIPS_y,CountyName,_merge,shipment_per_100k
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
3568,WA,adams,53001,53,2009,7673.547900,5300153,Adams County,WA,18405.0,Adams,WA,53001.0,53.0,Adams,both,41692.735126
3569,WA,adams,53001,53,2010,8516.658075,5300153,Adams County,WA,18790.0,Adams,WA,53001.0,53.0,Adams,both,45325.482038
3570,WA,adams,53001,53,2011,9310.998375,5300153,Adams County,WA,18877.0,Adams,WA,53001.0,53.0,Adams,both,49324.566271
3571,WA,adams,53001,53,2012,9063.213195,5300153,Adams County,WA,18944.0,Adams,WA,53001.0,53.0,Adams,both,47842.130463
3572,WA,adams,53001,53,2013,8987.487300,5300153,Adams County,WA,19098.0,Adams,WA,53001.0,53.0,Adams,both,47059.835061
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3911,WA,yakima,53077,53,2010,134937.738431,5307753,Yakima County,WA,244256.0,Yakima,WA,53077.0,53.0,Yakima,both,55244.390488
3912,WA,yakima,53077,53,2011,152014.707558,5307753,Yakima County,WA,245926.0,Yakima,WA,53077.0,53.0,Yakima,both,61813.190780
3913,WA,yakima,53077,53,2012,160465.293363,5307753,Yakima County,WA,246127.0,Yakima,WA,53077.0,53.0,Yakima,both,65196.135882
3914,WA,yakima,53077,53,2013,167832.615616,5307753,Yakima County,WA,246485.0,Yakima,WA,53077.0,53.0,Yakima,both,68090.397232


In [90]:
# calculating and displaying total number of drug prescriptions that took place in washington. results are grouped and displayed year and county wise 

washington_prescriptions_result = washington_prescriptions_copy.groupby(["Year", "county_test"])["shipment_per_100k"].sum().reset_index()
# washington_prescriptions_result = washington_prescriptions.groupby(["Year"])["shipment_per_100k"].sum().reset_index()

washington_prescriptions_result.head()
# def scale_years(year):
#     if year == 2007:
#         return -3
#     if year == 2008:
#         return -2
#     if year == 2009:
#         return -1
#     if year == 2010:
#         return 0
#     if year == 2011:
#         return 1
#     if year == 2012:
#         return 2

# washington_prescriptions_result["year relative to policy"] = washington_prescriptions_result["Year"].apply(lambda x: scale_years(x))
# comp_result["year relative to policy"] = comp_result["Year"].apply(lambda x: scale_years(x))

Unnamed: 0,Year,county_test,shipment_per_100k
0,2009,Adams,41692.735126
1,2009,Asotin,85323.965491
2,2009,Benton,66314.114389
3,2009,Chelan,69248.006992
4,2009,Clallam,146962.842839


## Diff-in-Diff Analysis Of Opioid shipments in Washington and Other States 

In [91]:
# creating a new dataset, that contains drug related deaths in other neighboring states of Florida - Alabama, Georgia, Mississippi, South Carolina and Tennessee

other_states_shipment = prescriptions_reduced_copy[(prescriptions_reduced_copy["State"] != "WA")]
# other_states_shipment = prescriptions_reduced_copy[(prescriptions_reduced_copy["StateName"] == "Alabama")|(prescriptions_reduced_copy["StateName"] == "Georgia")|(prescriptions_reduced_copy["StateName"] == "Mississippi")|(prescriptions_reduced_copy["StateName"] == "South Carolina")|(prescriptions_reduced_copy["StateName"] == "Tennessee")]

other_states_shipment


Unnamed: 0_level_0,BUYER_STATE,BUYER_COUNTY,CountyFIPS_x,StateFIPS_x,Year,MME,FIP_unique,County,State,Population,county_test,state_abbrev,CountyFIPS_y,StateFIPS_y,CountyName,_merge
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
3,CO,adams,8001,8,2009,110355.414760,80018,Adams County,CO,435700.0,Adams,CO,8001.0,8.0,Adams,both
4,CO,adams,8001,8,2010,132304.866282,80018,Adams County,CO,443691.0,Adams,CO,8001.0,8.0,Adams,both
5,CO,adams,8001,8,2011,157909.630983,80018,Adams County,CO,452201.0,Adams,CO,8001.0,8.0,Adams,both
6,CO,adams,8001,8,2012,161238.256294,80018,Adams County,CO,460558.0,Adams,CO,8001.0,8.0,Adams,both
7,CO,adams,8001,8,2013,134536.857641,80018,Adams County,CO,469978.0,Adams,CO,8001.0,8.0,Adams,both
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7308,CO,mineral,8079,8,2009,0.000000,80798,Mineral County,CO,732.0,Mineral,CO,8079.0,8.0,Mineral,both
7309,CO,mineral,8079,8,2011,0.000000,80798,Mineral County,CO,711.0,Mineral,CO,8079.0,8.0,Mineral,both
7310,CO,mineral,8079,8,2012,0.000000,80798,Mineral County,CO,719.0,Mineral,CO,8079.0,8.0,Mineral,both
7311,CO,mineral,8079,8,2013,0.000000,80798,Mineral County,CO,732.0,Mineral,CO,8079.0,8.0,Mineral,both


In [92]:
other_states_shipment_copy = other_states_shipment.copy()

# other_states_shipment_copy["shipment_per_100k"] = (other_states_shipment_copy["QUANTITY"] / other_states_shipment_copy["Population"]) * 100_000
other_states_shipment_copy["shipment_per_100k"] = (
        other_states_shipment_copy["MME"]
    / (other_states_shipment_copy["Population"])
    * 100000
)

other_states_shipment_copy


Unnamed: 0_level_0,BUYER_STATE,BUYER_COUNTY,CountyFIPS_x,StateFIPS_x,Year,MME,FIP_unique,County,State,Population,county_test,state_abbrev,CountyFIPS_y,StateFIPS_y,CountyName,_merge,shipment_per_100k
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
3,CO,adams,8001,8,2009,110355.414760,80018,Adams County,CO,435700.0,Adams,CO,8001.0,8.0,Adams,both,25328.302676
4,CO,adams,8001,8,2010,132304.866282,80018,Adams County,CO,443691.0,Adams,CO,8001.0,8.0,Adams,both,29819.145821
5,CO,adams,8001,8,2011,157909.630983,80018,Adams County,CO,452201.0,Adams,CO,8001.0,8.0,Adams,both,34920.230380
6,CO,adams,8001,8,2012,161238.256294,80018,Adams County,CO,460558.0,Adams,CO,8001.0,8.0,Adams,both,35009.327011
7,CO,adams,8001,8,2013,134536.857641,80018,Adams County,CO,469978.0,Adams,CO,8001.0,8.0,Adams,both,28626.203278
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7308,CO,mineral,8079,8,2009,0.000000,80798,Mineral County,CO,732.0,Mineral,CO,8079.0,8.0,Mineral,both,0.000000
7309,CO,mineral,8079,8,2011,0.000000,80798,Mineral County,CO,711.0,Mineral,CO,8079.0,8.0,Mineral,both,0.000000
7310,CO,mineral,8079,8,2012,0.000000,80798,Mineral County,CO,719.0,Mineral,CO,8079.0,8.0,Mineral,both,0.000000
7311,CO,mineral,8079,8,2013,0.000000,80798,Mineral County,CO,732.0,Mineral,CO,8079.0,8.0,Mineral,both,0.000000


In [93]:
# calculating and displaying results for drug related deaths - year, state and county wise 

other_states_result = other_states_shipment_copy.groupby(["Year", "State", "county_test"])["shipment_per_100k"].sum().reset_index()
other_states_result.head()


Unnamed: 0,Year,State,county_test,shipment_per_100k
0,2009,CO,Adams,25328.302676
1,2009,CO,Alamosa,81744.606763
2,2009,CO,Arapahoe,26335.286747
3,2009,CO,Archuleta,13570.33113
4,2009,CO,Baca,19080.435028


In [94]:
washington_summary = pd.DataFrame(washington_prescriptions_result.describe()["shipment_per_100k"]).rename(columns={"shipment_per_100k": "Opioid Shipment Per 100K Residents - Washington"})
other_states_summary = pd.DataFrame(other_states_result.describe()["shipment_per_100k"]).rename(columns={"shipment_per_100k": "Opioid Shipment Per 100K Residents - Control States (CO, MD, NC)"})
stats = pd.concat([washington_summary, other_states_summary], axis=1)
stats

Unnamed: 0,Opioid Shipment Per 100K Residents - Washington,"Opioid Shipment Per 100K Residents - Control States (CO, MD, NC)"
count,234.0,1116.0
mean,77147.967839,41998.141898
std,31714.063045,25089.709146
min,25763.387809,0.0
25%,54324.40625,25481.674644
50%,68697.031826,38731.68079
75%,90934.86297,55467.34102
max,175781.216926,172207.387718


In [95]:
# calculating yearwise drug related deaths results for CO
CO_results = other_states_shipment_copy[other_states_shipment_copy["State"] == "CO"].groupby("Year")["shipment_per_100k"].mean()
CO_results

Year
2009    24003.772083
2010    27288.528960
2011    31538.037181
2012    34263.659985
2013    31055.541468
2014    29139.028664
Name: shipment_per_100k, dtype: float64

In [96]:
# calculating yearwise drug related deaths results for MD
MD_results = other_states_shipment_copy[other_states_shipment_copy["State"] == "MD"].groupby("Year")["shipment_per_100k"].mean()
MD_results

Year
2009    45985.167022
2010    54298.313719
2011    56136.513985
2012    48552.015102
2013    44486.785048
2014    44411.448962
Name: shipment_per_100k, dtype: float64

In [97]:
# calculating yearwise drug related deaths results for NC
NC_results = other_states_shipment_copy[other_states_shipment_copy["State"] == "NC"].groupby("Year")["shipment_per_100k"].mean()
NC_results

Year
2009    41289.639277
2010    44744.885768
2011    47363.290500
2012    50210.871918
2013    52390.596408
2014    52252.208818
Name: shipment_per_100k, dtype: float64

In [98]:
wa_result = washington_prescriptions_result.groupby("Year")["shipment_per_100k"].mean().reset_index()
comp_result = other_states_result.groupby(["Year"])["shipment_per_100k"].mean().reset_index()
comp_result
wa_result

Unnamed: 0,Year,shipment_per_100k
0,2009,75058.203038
1,2010,77468.976409
2,2011,77897.686034
3,2012,77317.159398
4,2013,77655.943149
5,2014,77489.839007


In [99]:
# washington_summary = pd.DataFrame(wa_result.describe()["shipment_per_100k"]).rename(columns={"shipment_per_100k": "Opioid Shipment per 100k Residents - Washington"})
# comp_summary = pd.DataFrame(comp_result.describe()["shipment_per_100k"]).rename(columns={"shipment_per_100k": "Opioid Shipment per 100k Residents - Control States"})
# stats = pd.concat([washington_summary, comp_summary], axis=1)
# stats

In [100]:
# # create a scale for number of years before and after 2012 (target year)

# def scale_years(year):
#     if year == 2009:
#         return -3
#     if year == 2010:
#         return -2
#     if year == 2011:
#         return -1
#     if year == 2012:
#         return 0
#     if year == 2013:
#         return 1
#     if year == 2014:
#         return 2

# wa_result["year relative to policy"] = wa_result["Year"].apply(lambda x: scale_years(x))
# comp_result["year relative to policy"] = comp_result["Year"].apply(lambda x: scale_years(x))

# # double check no nulls in "year relative to policy"

# assert (wa_result["year relative to policy"].isnull().sum() == 0)
# assert (comp_result["year relative to policy"].isnull().sum() == 0)

In [101]:
wa_result

Unnamed: 0,Year,shipment_per_100k
0,2009,75058.203038
1,2010,77468.976409
2,2011,77897.686034
3,2012,77317.159398
4,2013,77655.943149
5,2014,77489.839007


In [102]:
# # split into before 2012 and after 2012

wa_b4 = wa_result[wa_result["Year"] < 2012]
wa_after = wa_result[wa_result["Year"] >= 2012]
comp_b4 = comp_result[comp_result["Year"] < 2012]
comp_after = comp_result[comp_result["Year"] >= 2012]



In [103]:
def vertical_line(year):
   
    line = alt.Chart(pd.DataFrame({
    'Year': [year],
    'color': ["black"]
    })).mark_rule().encode(
    x='Year:Q',
    color=alt.Color('color:N', scale=None)
    )
 
    return line

In [104]:
# creating the vertical line at 2010, the year of implementation of policy changes in Florida
line = vertical_line(2012)


In [105]:
# creating the regression model and calculating the error bands for creating the plots
 
def get_reg_fit_and_ci(data, color, xvar, yvar, legend, alpha=0.05):
   
    # Creating the grid for predicted values
    colour = color
    years = [2009,2010,2011,2012,2013,2014,2015]
    x = data.loc[pd.notnull(data[yvar]), xvar]
    xmin = x.min()
    xmax = x.max()
    step = (xmax - xmin) / 100
    grid = np.arange(xmin, xmax + step, step)
    predictions = pd.DataFrame({xvar: grid})
 
    # Fitting the model and making the predictions
    model = smf.ols(f"{yvar} ~ {xvar}", data=data).fit()
    model_predict = model.get_prediction(predictions[xvar])
    predictions[yvar] = model_predict.summary_frame()["mean"]
    predictions[["ci_low", "ci_high"]] = model_predict.conf_int(alpha=alpha)
   
    # Utilizing the predictions to create the points and error bands in the chart
    predictions["Before/After"] = f"{legend}"
    reg = (
        alt.Chart(predictions)
        .mark_line()
        .encode(
            x=xvar,
            y=alt.Y(yvar),
            color = alt.Color("Before/After",legend=alt.Legend(title = "States"))
        )
    )
 
    ci = (
        alt.Chart(predictions)
        .mark_errorband()
        .encode(
            alt.X(f"{xvar}:Q", axis=alt.Axis(format=".0f", values=years)),
            y=alt.Y(
                "ci_low",
                title="Opioid Shipment (MME) Per 100K Residents",
                scale=alt.Scale(zero=False),
            ),
            y2="ci_high",
            color=alt.value(f"{color}"),
        )
    )
    chart = ci + reg
    return predictions, chart
 
 


In [106]:
# Using the get_reg_fit_and_ci function to create the charts
 
def build_chart(data, color, xvar, yvar, legend, alpha=0.05):
    fit, reg_chart = get_reg_fit_and_ci(
        data=data, color=color, xvar=xvar, yvar=yvar,  legend=legend, alpha=alpha,
    )
    return reg_chart
 

In [107]:
# creating the final plot for pre-post analysis of drug overdose deaths in Florida
 
washington_before_chart = build_chart(
    wa_b4, "orange", "Year","shipment_per_100k", "Washington",  alpha=0.05
)
 
washington_after_chart = build_chart(
    wa_after, "orange", "Year", "shipment_per_100k", "Washington", alpha=0.05
)

other_states_before_chart = build_chart(
    comp_b4, "blue", "Year","shipment_per_100k", "Control States - CO,MD,NC",  alpha=0.05
)
 
other_states_after_chart = build_chart(
    comp_after, "blue", "Year", "shipment_per_100k", "Control States - CO,MD,NC", alpha=0.05
)
 
washington_final_diff_in_diff_prescriptions = (washington_before_chart + washington_after_chart + other_states_before_chart + other_states_after_chart + line).properties(title="Difference-In-Difference Analysis Of Opioid Shipment In Washington")
 
washington_final_diff_in_diff_prescriptions
 


  for col_name, dtype in df.dtypes.iteritems():


In [108]:

# # wa_after = wa_after[wa_after["Year"] != 2011] # may need to handle this differently
# from sklearn.linear_model import LinearRegression 

# regressor_b4 = LinearRegression() 
# regressor_after = LinearRegression()


# X_b4 = np.array(wa_b4["year relative to policy"]).reshape(-1, 1)
# y_b4 = np.array(wa_b4["shipment_per_100k"]).reshape(-1, 1)

# X_after = np.array(wa_after["year relative to policy"]).reshape(-1, 1)
# y_after = np.array(wa_after["shipment_per_100k"]).reshape(-1, 1)


# regressor_b4.fit(X_b4,y_b4) 
# regressor_after.fit(X_after,y_after) 


# y_pred_b4 = regressor_b4.predict(X_b4)
# y_pred_after = regressor_after.predict(X_after)


In [109]:
# c
# # comp_after = comp_after[comp_after["Year"] != 2011] # may need to handle this differently

# regressor_b41 = LinearRegression() 
# regressor_after1 = LinearRegression()


# X_b41 = np.array(comp_b4["year relative to policy"]).reshape(-1, 1)
# y_b41 = np.array(comp_b4["shipment_per_100k"]).reshape(-1, 1)

# X_after1 = np.array(comp_after["year relative to policy"]).reshape(-1, 1)
# y_after1 = np.array(comp_after["shipment_per_100k"]).reshape(-1, 1)


# regressor_b41.fit(X_b41,y_b41) 
# regressor_after1.fit(X_after1,y_after1) 


# y_pred_b41 = regressor_b41.predict(X_b41)
# y_pred_after1 = regressor_after1.predict(X_after)

In [110]:
# y_pred_after1
# X_after1

In [111]:
# plt.xlim(-3, 3)
# #plt.ylim(0, 500)
# plt.title("Opioid Shipment before and after policy implementation in Washington")
# plt.xlabel("year relative to policy")
# plt.ylabel("Opioid Shipment per 100k")
# plt.plot(X_b41, y_pred_b41,color='k', label = "comp before")
# plt.plot(X_after1, y_pred_after1,color='k', label = "comp after")
# plt.plot(X_b4, y_pred_b4,color='b', label = "Washington before")
# plt.plot(X_after, y_pred_after,color='b', label = "Washington after")
# plt.legend()
# plt.show()

# Diff-in-Diff plots

In [112]:
wa_b4 = wa_result[wa_result["Year"] < 2012]
wa_after = wa_result[wa_result["Year"] >= 2012]
comp_b4 = comp_result[comp_result["Year"] < 2012]
comp_after = comp_result[comp_result["Year"] >= 2012]
x = "Year"
y = "shipment_per_100k"
def vertical_line(year):
    """Function to plot a vertical line at year of policy implementation"""
    line = alt.Chart(pd.DataFrame({
    'Date': [year],
    'color': ["black"]
    })).mark_rule().encode(
    x='Date:Q',
    color=alt.Color('color:N', scale=None)
    )

    return line
def get_reg_fit(data, yvar, xvar, color, title, alpha=0.05):
    import statsmodels.formula.api as smf

    # Grid for predicted values
    x = data.loc[pd.notnull(data[yvar]), xvar]
    xmin = x.min()
    xmax = x.max()
    step = (xmax - xmin) / 100
    grid = np.arange(xmin, xmax + step, step)
    predictions = pd.DataFrame({xvar: grid})

    # Fit model, get predictions
    model = smf.ols(f"{yvar} ~ {xvar}", data=data).fit()
    model_predict = model.get_prediction(predictions[xvar])
    predictions[yvar] = model_predict.summary_frame()["mean"]
    predictions[["ci_low", "ci_high"]] = model_predict.conf_int(alpha=alpha)

    # Build chart
    reg = (
        alt.Chart(predictions)
        .mark_line()
        .encode(
            x=alt.X(xvar, scale=alt.Scale(zero=False), title="Year"),
            y=alt.Y(
                yvar, scale=alt.Scale(zero=False), title="Shipments per 100k Residents"
            ),
            color=alt.value(color)
        ).properties(title=title)
    )
    ci = (
        alt.Chart(predictions)
        .mark_errorband()
        .encode(
            x=xvar,
            y=alt.Y("ci_low", title=yvar),
            y2="ci_high",
            color=alt.value(color),
        )
    )
    chart = ci + reg
    return predictions, chart
line = vertical_line(2012)
fit_wa_b4, reg_chart_wa_b4 = get_reg_fit(
    wa_b4, yvar="shipment_per_100k", xvar="Year", color="blue", title= "Shipments per 100k Residents - Washington", alpha=0.05
)


fit_wa_b4, reg_chart_wa_after = get_reg_fit(
    wa_after, yvar="shipment_per_100k", xvar="Year", color="blue", title= "Shipments per 100k Residents - Washington",alpha=0.05
)


In [113]:
# remaking these plots to change the title for diff in diff
fit_wa_b4, reg_chart_wa_b4 = get_reg_fit(
    wa_b4, yvar="shipment_per_100k", xvar="Year", color="blue", title= "Shipments per 100k Residents - Washington (blue) vs. Comp States (green)", alpha=0.05
)


fit_wa_b4, reg_chart_wa_after = get_reg_fit(
    wa_after, yvar="shipment_per_100k", xvar="Year", color="blue", title= "Shipments per 100k Residents - Washington (blue) vs. Comp States (green)",alpha=0.05
)
(reg_chart_wa_b4 + reg_chart_wa_after + line).properties(width=700, height=500)

# texas above
################################################################
# comp below

fit_comp_b4, reg_chart_comp_b4 = get_reg_fit(
    comp_b4, yvar="shipment_per_100k", xvar="Year", color="green", title= "Shipments per 100k Residents - Comp States",alpha=0.05
)
reg_chart_comp_b4


fit_comp_after, reg_chart_comp_after = get_reg_fit(
    comp_after, yvar="shipment_per_100k", xvar="Year", color="green", title= "Shipments per 100k Residents - Comp States",alpha=0.05
)
# reg_chart_comp_b4 + reg_chart_comp_after


In [114]:
(reg_chart_wa_b4 + reg_chart_wa_after + reg_chart_comp_b4 + reg_chart_comp_after + line).properties(width=700, height=500)

  for col_name, dtype in df.dtypes.iteritems():
