In [1]:
import numpy as np
import pandas as pd
import altair as alt
import matplotlib.pyplot as plt
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [2]:
prescriptions = pd.read_parquet("../20_intermediate_files/prescriptions_wa.parquet", engine = "fastparquet")

In [3]:
prescriptions.columns

Index(['BUYER_STATE', 'BUYER_COUNTY', 'CountyFIPS_x', 'StateFIPS_x', 'Year',
       'MME', 'FIP_unique', 'County', 'State', 'Population', 'county_test',
       'state_abbrev', 'CountyFIPS_y', 'StateFIPS_y', 'CountyName', '_merge'],
      dtype='object')

In [4]:
prescriptions["State"].value_counts()

NC    600
CO    372
WA    234
MD    144
Name: State, dtype: int64

In [5]:
prescriptions["Year"].value_counts()

2009.0    225
2010.0    225
2011.0    225
2012.0    225
2013.0    225
2014.0    225
Name: Year, dtype: int64

In [6]:
# creating a copy of reduced dataset of prescriptions and converting some of the attributes to appropriate data type

prescriptions_reduced_copy = prescriptions.copy()

prescriptions_reduced_copy["Year"] = prescriptions_reduced_copy["Year"].astype("int64")


In [7]:
prescriptions_reduced_copy.head()

Unnamed: 0_level_0,BUYER_STATE,BUYER_COUNTY,CountyFIPS_x,StateFIPS_x,Year,MME,FIP_unique,County,State,Population,county_test,state_abbrev,CountyFIPS_y,StateFIPS_y,CountyName,_merge
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
3,CO,adams,8001,8,2009,110355.41476,80018,Adams County,CO,435700.0,Adams,CO,8001.0,8.0,Adams,both
4,CO,adams,8001,8,2010,132304.866282,80018,Adams County,CO,443691.0,Adams,CO,8001.0,8.0,Adams,both
5,CO,adams,8001,8,2011,157909.630983,80018,Adams County,CO,452201.0,Adams,CO,8001.0,8.0,Adams,both
6,CO,adams,8001,8,2012,161238.256294,80018,Adams County,CO,460558.0,Adams,CO,8001.0,8.0,Adams,both
7,CO,adams,8001,8,2013,134536.857641,80018,Adams County,CO,469978.0,Adams,CO,8001.0,8.0,Adams,both


In [8]:
# creating a dataset that has all the drug prescriptions in the state of Washington

washington_prescriptions = prescriptions_reduced_copy[
    prescriptions_reduced_copy["State"] == "WA"
]

washington_prescriptions.head()

Unnamed: 0_level_0,BUYER_STATE,BUYER_COUNTY,CountyFIPS_x,StateFIPS_x,Year,MME,FIP_unique,County,State,Population,county_test,state_abbrev,CountyFIPS_y,StateFIPS_y,CountyName,_merge
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
3568,WA,adams,53001,53,2009,7673.5479,5300153,Adams County,WA,18405.0,Adams,WA,53001.0,53.0,Adams,both
3569,WA,adams,53001,53,2010,8516.658075,5300153,Adams County,WA,18790.0,Adams,WA,53001.0,53.0,Adams,both
3570,WA,adams,53001,53,2011,9310.998375,5300153,Adams County,WA,18877.0,Adams,WA,53001.0,53.0,Adams,both
3571,WA,adams,53001,53,2012,9063.213195,5300153,Adams County,WA,18944.0,Adams,WA,53001.0,53.0,Adams,both
3572,WA,adams,53001,53,2013,8987.4873,5300153,Adams County,WA,19098.0,Adams,WA,53001.0,53.0,Adams,both


In [9]:
washington_prescriptions_copy = washington_prescriptions.copy()
washington_prescriptions_copy["shipment_per_100k"] = (
        washington_prescriptions_copy["MME"]
    / (washington_prescriptions_copy["Population"])
    * 100_000
)

washington_prescriptions_copy.head()

Unnamed: 0_level_0,BUYER_STATE,BUYER_COUNTY,CountyFIPS_x,StateFIPS_x,Year,MME,FIP_unique,County,State,Population,county_test,state_abbrev,CountyFIPS_y,StateFIPS_y,CountyName,_merge,shipment_per_100k
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
3568,WA,adams,53001,53,2009,7673.5479,5300153,Adams County,WA,18405.0,Adams,WA,53001.0,53.0,Adams,both,41692.735126
3569,WA,adams,53001,53,2010,8516.658075,5300153,Adams County,WA,18790.0,Adams,WA,53001.0,53.0,Adams,both,45325.482038
3570,WA,adams,53001,53,2011,9310.998375,5300153,Adams County,WA,18877.0,Adams,WA,53001.0,53.0,Adams,both,49324.566271
3571,WA,adams,53001,53,2012,9063.213195,5300153,Adams County,WA,18944.0,Adams,WA,53001.0,53.0,Adams,both,47842.130463
3572,WA,adams,53001,53,2013,8987.4873,5300153,Adams County,WA,19098.0,Adams,WA,53001.0,53.0,Adams,both,47059.835061


In [10]:
# calculating and displaying total number of drug prescriptions that took place in washington. results are grouped and displayed year and county wise 

wa_result = washington_prescriptions_copy.groupby(["Year"])["shipment_per_100k"].mean().reset_index()
# washington_prescriptions_result = washington_prescriptions.groupby(["Year"])["shipment_per_100k"].sum().reset_index()

wa_result.head(10)

Unnamed: 0,Year,shipment_per_100k
0,2009,75058.203038
1,2010,77468.976409
2,2011,77897.686034
3,2012,77317.159398
4,2013,77655.943149
5,2014,77489.839007


In [11]:
# change Year to be integer 
wa_result["Year"] = wa_result["Year"].astype("int")
wa_result.dtypes

Year                   int64
shipment_per_100k    float64
dtype: object

In [12]:
# split into before and after policy implementation 
wa_b4 = wa_result[wa_result["Year"] < 2012]
wa_after = wa_result[wa_result["Year"] >= 2012]

In [13]:
x = "Year"
y = "shipment_per_100k"

In [14]:
# Function to plot a vertical line at year of policy implementation
def vertical_line(year):
   
    line = alt.Chart(pd.DataFrame({
    'Year': [year],
    'color': ["black"]
    })).mark_rule().encode(
    x='Year:Q',
    color=alt.Color('color:N', scale=None)
    )
 
    return line


In [15]:
# creating the vertical line at 2012, the year of implementation of policy changes in Washington
line = vertical_line(2012)



In [16]:
# creating the regression model and calculating the error bands for creating the plots
 
def get_reg_fit_and_ci(data, color, xvar, yvar, legend, alpha=0.05):
   
    # Creating the grid for predicted values
    colour = color
    years = [2009,2010,2011,2012,2013,2014]
    x = data.loc[pd.notnull(data[yvar]), xvar]
    xmin = x.min()
    xmax = x.max()
    step = (xmax - xmin) / 100
    grid = np.arange(xmin, xmax + step, step)
    predictions = pd.DataFrame({xvar: grid})
 
    # Fitting the model and making the predictions
    model = smf.ols(f"{yvar} ~ {xvar}", data=data).fit()
    model_predict = model.get_prediction(predictions[xvar])
    predictions[yvar] = model_predict.summary_frame()["mean"]
    predictions[["ci_low", "ci_high"]] = model_predict.conf_int(alpha=alpha)
   
    # Utilizing the predictions to create the points and error bands in the chart
    predictions["Before/After"] = f"{legend}"
    reg = (
        alt.Chart(predictions)
        .mark_line()
        .encode(
            x=xvar,
            y=alt.Y(yvar),
            color = alt.Color("Before/After",legend=alt.Legend(title = "Before/After Policy Change"))
        )
    )
 
    ci = (
        alt.Chart(predictions)
        .mark_errorband()
        .encode(
            alt.X(f"{xvar}:Q", axis=alt.Axis(format=".0f", values=years)),
            y=alt.Y(
                "ci_low",
                title="Opioid Shipment (MME) Per 100K Residents",
                scale=alt.Scale(zero=False),
            ),
            y2="ci_high",
            color=alt.value(f"{color}"),
        )
    )
    chart = ci + reg
    return predictions, chart

In [17]:
# Using the get_reg_fit_and_ci function to create the charts
 
def build_chart(data, color, xvar, yvar, legend, alpha=0.05):
    fit, reg_chart = get_reg_fit_and_ci(
        data=data, color=color, xvar=xvar, yvar=yvar,  legend=legend, alpha=alpha,
    )
    return reg_chart
 


In [18]:
# creating the final plot for pre-post analysis of opioid prescriptions in Washington 
 
washington_before_chart = build_chart(
    wa_b4, "orange", "Year","shipment_per_100k", "Washington Before",  alpha=0.05
)
 
washington_after_chart = build_chart(
    wa_after, "blue", "Year", "shipment_per_100k", "Washington After", alpha=0.05
)
 
washington_final_pre_post_prescriptions = (washington_before_chart + washington_after_chart + line).properties(title="Pre Post Analysis Of Opioid Shipment In Washington")
 
washington_final_pre_post_prescriptions
