In [1]:
## import libraries
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
import altair as alt

## Load data from the FL shipment cleansed files
ship_data_load_FL = pd.read_csv('/Users/sukhpreetsahota/Desktop/Duke/Fall 2022/IDS 720.01.F22/Class Project/pds-2022-yellow-team/20_intermediate_files/fl_ship_merge.csv')
ship_data_load_FL_copy = ship_data_load_FL.copy()
ship_data_load_FL_copy['Shipment_Rate_Percentage_MME_Rate'] = ship_data_load_FL_copy['MME']/ship_data_load_FL_copy['POPULATION']
ship_data_FL = ship_data_load_FL_copy.loc[ship_data_load_FL_copy['BUYER_STATE']=='FL']
ship_data_FL_reference = ship_data_load_FL_copy.loc[ship_data_load_FL_copy['BUYER_STATE']!='FL']
ship_data_FL

Unnamed: 0,BUYER_STATE,BUYER_COUNTY,YEAR,MME,FIPS Code,State,STNAME,CTYNAME,POPULATION,Shipment_Rate_Percentage_MME_Rate
135,FL,ALACHUA,2006,8.259662e+07,12001,Florida,Florida,ALACHUA,237199.0,348.216555
136,FL,ALACHUA,2007,9.525963e+07,12001,Florida,Florida,ALACHUA,240196.0,396.591234
137,FL,ALACHUA,2008,1.146752e+08,12001,Florida,Florida,ALACHUA,242133.0,473.604267
138,FL,ALACHUA,2009,1.412810e+08,12001,Florida,Florida,ALACHUA,243574.0,580.033370
139,FL,ALACHUA,2010,1.509108e+08,12001,Florida,Florida,ALACHUA,247624.0,609.435216
...,...,...,...,...,...,...,...,...,...,...
730,FL,WASHINGTON,2010,1.420824e+07,12133,Florida,Florida,WASHINGTON,24726.0,574.627644
731,FL,WASHINGTON,2011,1.396408e+07,12133,Florida,Florida,WASHINGTON,24516.0,569.590516
732,FL,WASHINGTON,2012,1.565814e+07,12133,Florida,Florida,WASHINGTON,24747.0,632.728645
733,FL,WASHINGTON,2013,1.621628e+07,12133,Florida,Florida,WASHINGTON,24506.0,661.726733


In [2]:
## Transform and Groupby MME Rate by State and Year for FL
ship_data_FL[
    "MME_Rate"
] = ship_data_FL.groupby(["BUYER_STATE", "YEAR"])[
    "Shipment_Rate_Percentage_MME_Rate"
].transform(
    "mean"
)
ship_data_FL_subset = ship_data_FL[["BUYER_STATE", "YEAR", "MME_Rate"]]
ship_data_FL_subset_grouped = ship_data_FL_subset.groupby(["BUYER_STATE", "YEAR"], as_index = False).mean()
ship_data_FL_subset_grouped_pre = ship_data_FL_subset_grouped.loc[ship_data_FL_subset_grouped["YEAR"] < 2010]
ship_data_FL_subset_grouped_post = ship_data_FL_subset_grouped.loc[ship_data_FL_subset_grouped["YEAR"] >= 2010]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ship_data_FL[


In [3]:
## Function to create confidence interval for FL
def get_reg_fit_FL(data, yvar, xvar, alpha):
    # Grid for predicted values
    x = data.loc[pd.notnull(data[yvar]), xvar]
    xmin = x.min()
    xmax = x.max()
    step = (xmax - xmin) / 100
    grid = np.arange(xmin, xmax + step, step)
    predictions = pd.DataFrame({xvar: grid})

    # Fit model, get predictions
    model = smf.ols(f"{yvar} ~ {xvar}", data=data).fit()
    model_predict = model.get_prediction(predictions[xvar])
    predictions[yvar] = model_predict.summary_frame()["mean"]
    predictions[["ci_low", "ci_high"]] = model_predict.conf_int(alpha=alpha)

    # Build chart
    reg = alt.Chart(predictions).mark_line(color = "teal").encode(
        x=alt.X(
            xvar, 
            scale=alt.Scale(zero=False), 
            axis = alt.Axis(format="T", 
            title = "Year")), 
        y = alt.Y(
            yvar, 
            scale=alt.Scale(zero=False),
            title = "Opioid Shipment Rate (per 100,000 people)")
    )
    ci = (
        alt.Chart(predictions)
        .mark_errorband(color = "teal")
        .encode(
            x=xvar,
            y=alt.Y("ci_low", title=""),
            y2="ci_high",
        )
    )
    chart = ci + reg
    return predictions, chart

In [4]:
## Generate Pre-Post Graphs for FL
fit, reg_chart_pre_FL = get_reg_fit_FL(
    ship_data_FL_subset_grouped_pre, 
    yvar="MME_Rate", 
    xvar="YEAR", 
    alpha=0.05
)

fit, reg_chart_post_FL = get_reg_fit_FL(
    ship_data_FL_subset_grouped_post, 
    yvar="MME_Rate", 
    xvar="YEAR", 
    alpha=0.05
)

## Create line post-policy implementation
line_2010 = alt.Chart(pd.DataFrame({'x': [2010]})).mark_rule(strokeDash=[5, 5]).encode(x='x')

## Generate final pre-post graph for FL
pre_post_FL = reg_chart_pre_FL + reg_chart_post_FL + line_2010
pre_post_FL.properties(title="Pre-Post Florida Shipment Rate Analysis")

In [5]:
## Load data from the WA shipment cleansed files
ship_data_load_WA = pd.read_csv('/Users/sukhpreetsahota/Desktop/Duke/Fall 2022/IDS 720.01.F22/Class Project/pds-2022-yellow-team/20_intermediate_files/wa_ship_merge.csv')
ship_data_load_WA_copy = ship_data_load_WA.copy()
ship_data_load_WA_copy['Shipment_Rate_Percentage_MME_Rate'] = ship_data_load_WA_copy['MME']/ship_data_load_WA_copy['POPULATION']
ship_data_WA = ship_data_load_WA_copy.loc[ship_data_load_WA_copy['BUYER_STATE']=='WA']
ship_data_WA_reference = ship_data_load_WA_copy.loc[ship_data_load_WA_copy['BUYER_STATE']!='WA']
ship_data_WA

Unnamed: 0,BUYER_STATE,BUYER_COUNTY,YEAR,MME,FIPS Code,State,STNAME,CTYNAME,POPULATION,Shipment_Rate_Percentage_MME_Rate
1219,WA,ADAMS,2006,2.424599e+06,53001,Washington,Washington,ADAMS,16615.0,145.928309
1220,WA,ADAMS,2007,2.891420e+06,53001,Washington,Washington,ADAMS,16943.0,170.655718
1221,WA,ADAMS,2008,3.410410e+06,53001,Washington,Washington,ADAMS,17257.0,197.624737
1222,WA,ADAMS,2009,3.836774e+06,53001,Washington,Washington,ADAMS,17732.0,216.375702
1223,WA,ADAMS,2010,4.258329e+06,53001,Washington,Washington,ADAMS,18791.0,226.615350
...,...,...,...,...,...,...,...,...,...,...
1565,WA,YAKIMA,2010,6.746887e+07,53077,Washington,Washington,YAKIMA,244249.0,276.229869
1566,WA,YAKIMA,2011,7.600735e+07,53077,Washington,Washington,YAKIMA,245899.0,309.099890
1567,WA,YAKIMA,2012,8.023265e+07,53077,Washington,Washington,YAKIMA,246064.0,326.064141
1568,WA,YAKIMA,2013,8.391631e+07,53077,Washington,Washington,YAKIMA,246395.0,340.576342


In [6]:
## Transform and Groupby MME Rate by State and Year for WA
ship_data_WA[
    "MME_Rate"
] = ship_data_WA.groupby(["BUYER_STATE", "YEAR"])[
    "Shipment_Rate_Percentage_MME_Rate"
].transform(
    "mean"
)
ship_data_WA_subset = ship_data_WA[["BUYER_STATE", "YEAR", "MME_Rate"]]
ship_data_WA_subset_grouped = ship_data_WA_subset.groupby(["BUYER_STATE", "YEAR"], as_index = False).mean()
ship_data_WA_subset_grouped_pre = ship_data_WA_subset_grouped.loc[ship_data_WA_subset_grouped["YEAR"] < 2012]
ship_data_WA_subset_grouped_post = ship_data_WA_subset_grouped.loc[ship_data_WA_subset_grouped["YEAR"] >= 2012]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ship_data_WA[


In [7]:
## Function to create confidence interval for WA
def get_reg_fit_WA(data, yvar, xvar, alpha):
    # Grid for predicted values
    x = data.loc[pd.notnull(data[yvar]), xvar]
    xmin = x.min()
    xmax = x.max()
    step = (xmax - xmin) / 100
    grid = np.arange(xmin, xmax + step, step)
    predictions = pd.DataFrame({xvar: grid})

    # Fit model, get predictions
    model = smf.ols(f"{yvar} ~ {xvar}", data=data).fit()
    model_predict = model.get_prediction(predictions[xvar])
    predictions[yvar] = model_predict.summary_frame()["mean"]
    predictions[["ci_low", "ci_high"]] = model_predict.conf_int(alpha=alpha)

    # Build chart
    reg = alt.Chart(predictions).mark_line(color = "purple").encode(
        x=alt.X(
            xvar, 
            scale=alt.Scale(zero=False), 
            axis = alt.Axis(format="T", 
            title = "Year")), 
        y = alt.Y(
            yvar, 
            scale=alt.Scale(zero=False),
            title = "Opioid Shipment Rate (per 100,000 people)")
    )
    ci = (
        alt.Chart(predictions)
        .mark_errorband(color = "purple")
        .encode(
            x=xvar,
            y=alt.Y("ci_low", title=""),
            y2="ci_high",
        )
    )
    chart = ci + reg
    return predictions, chart

In [8]:
## Generate Pre-Post Graphs for WA
fit, reg_chart_pre_WA = get_reg_fit_WA(
    ship_data_WA_subset_grouped_pre, 
    yvar="MME_Rate", 
    xvar="YEAR", 
    alpha=0.05
)

fit, reg_chart_post_WA = get_reg_fit_WA(
    ship_data_WA_subset_grouped_post, 
    yvar="MME_Rate", 
    xvar="YEAR", 
    alpha=0.05
)

## Create line post-policy implementation
line_2012 = alt.Chart(pd.DataFrame({'x': [2012]})).mark_rule(strokeDash=[5, 5]).encode(x='x')

## Generate final pre-post graph for WA
pre_post_WA = reg_chart_pre_WA + reg_chart_post_WA + line_2012
pre_post_WA.properties(title="Pre-Post Washington Shipment Rate Analysis")