# Difference-in-Differences Part II: HCRIS + Medicaid Expansion

*(Empirical-only notebook: implements all code chunks from the slide deck in both R and Python.)*


In [None]:
!pip -q install rpy2
%load_ext rpy2.ipython

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf


In [None]:
%%R
if (!require("pacman")) install.packages("pacman")
pacman::p_load(tidyverse, lubridate, fixest, modelsummary)


## Step 1: Look at the data (R)


In [None]:
%%R
hcris.data <- read_csv("../data/output/hcris-snippets/hcris-data.csv") %>%
  mutate(
    expand_year = lubridate::year(date_adopted),
    expand_ever = expanded,
    status = if_else(!is.na(expand_year) & year >= expand_year, "Expanded", "Not Expanded"),
    uncomp_care_m = uncomp_care / 1e6
  ) %>%
  filter(!is.na(expand_ever))

uc.plot.dat <- hcris.data %>%
  group_by(expand_ever, year) %>%
  summarize(mean = mean(uncomp_care_m, na.rm = TRUE), .groups = "drop")

uc.plot <- ggplot(data = uc.plot.dat, aes(x = year, y = mean, group = expand_ever, linetype = expand_ever)) + 
  geom_line() + geom_point() + theme_bw() +
  geom_vline(xintercept = 2013.5, color = "red") +
  geom_text(data = uc.plot.dat %>% filter(year == 2015), 
            aes(label = c("Non-expansion", "Expansion"),
                x = year + 1,
                y = mean)) +
  guides(linetype = "none") +
  labs(
    x = "Year",
    y = "Uncompensated Care ($mm)",
    title = "Hospital Uncompensated Care Over Time"
  )

uc.plot


## Step 1: Look at the data (Python)


In [None]:
hcris_data = pd.read_csv("../data/output/hcris-snippets/hcris-data.csv")

hcris_data["expand_year"] = pd.to_datetime(hcris_data["date_adopted"]).dt.year
hcris_data["expand_ever"] = hcris_data["expanded"]

hcris_data["status"] = np.where(
    (~hcris_data["expand_year"].isna()) & (hcris_data["year"] >= hcris_data["expand_year"]),
    "Expanded",
    "Not Expanded"
)

hcris_data["uncomp_care_m"] = hcris_data["uncomp_care"] / 1_000_000

hcris_data = hcris_data.loc[~hcris_data["expand_ever"].isna()].copy()

uc_plot_dat = (
    hcris_data
    .groupby(["expand_ever", "year"], as_index=False)
    .agg(mean=("uncomp_care_m", "mean"))
)

for key, grp in uc_plot_dat.groupby("expand_ever"):
    plt.plot(grp["year"], grp["mean"], marker="o", label=str(key))

plt.axvline(2013.5, color="red")
plt.xlabel("Year")
plt.ylabel("Uncompensated Care ($mm)")
plt.title("Hospital Uncompensated Care Over Time")
plt.legend()
plt.show()


## Step 2: Estimate effects (R)


In [None]:
%%R
reg.dat <- hcris.data %>%
  filter(expand_year == 2014 | is.na(expand_year)) %>%
  mutate(
    post = (year >= 2014),
    treat = post * expand_ever
  )

dd.uc.reg <- lm(uncomp_care_m ~ post + expand_ever + post * expand_ever, data = reg.dat)

modelsummary(list("DD (2014)" = dd.uc.reg),
             shape = term + statistic ~ model,
             gof_map = NA,
             coef_omit = "Intercept",
             vcov = ~ state)


## Step 2: Estimate effects (Python)


In [None]:
reg_dat = hcris_data.loc[
    (hcris_data["expand_year"].eq(2014)) | (hcris_data["expand_year"].isna())
].copy()

reg_dat["post"] = reg_dat["year"] >= 2014
reg_dat["treat"] = reg_dat["post"] & reg_dat["expand_ever"]

reg_dat["post"] = reg_dat["post"].astype(int)
reg_dat["expand_ever"] = reg_dat["expand_ever"].astype(int)
reg_dat["treat"] = reg_dat["treat"].astype(int)

reg_dat["uncomp_care_m"] = reg_dat["uncomp_care"] / 1_000_000

dd_uc_reg = smf.ols("uncomp_care_m ~ post + expand_ever + post:expand_ever", data=reg_dat).fit()
dd_uc_reg.summary()


## TWFE in practice (R)


In [None]:
%%R
m.dd <- lm(uncomp_care_m ~ post + expand_ever + treat, data = reg.dat)

m.twfe <- feols(uncomp_care_m ~ treat | provider_number + year,
                data = reg.dat,
                cluster = ~ state)

modelsummary(list("DD" = m.dd, "TWFE" = m.twfe),
             shape = term + statistic ~ model,
             gof_map = NA,
             coef_omit = "Intercept",
             vcov = ~ state)


## TWFE in practice (Python)


In [None]:
m_dd = smf.ols("uncomp_care_m ~ post + expand_ever + treat", data=reg_dat).fit()
m_twfe = smf.ols("uncomp_care_m ~ treat + C(provider_number) + C(year)", data=reg_dat).fit()

print(m_dd.summary())
print(m_twfe.summary())


## Common treatment timing event study (R)


In [None]:
%%R
mod.twfe <- feols(uncomp_care_m ~ i(year, expand_ever, ref = 2013) | provider_number + year,
                  cluster = ~ state,
                  data = reg.dat)

iplot(mod.twfe, xlab = 'Time to treatment', main = 'Event study')


## Common treatment timing event study (Python)


In [None]:
mod_twfe = smf.ols(
    "uncomp_care_m ~ C(year)*expand_ever + C(provider_number)",
    data=reg_dat
).fit()

print(mod_twfe.summary())


## Differential treatment timing event study (R)


In [None]:
%%R
reg.dat.full <- hcris.data %>%
  filter(!is.na(expand_ever)) %>%
  mutate(
    time_to_treat = if_else(expand_ever == FALSE, 0, year - expand_year),
    time_to_treat = if_else(time_to_treat < -3, -3, time_to_treat)
  )

mod.twfe.full <- feols(uncomp_care_m ~ i(time_to_treat, expand_ever, ref = -1) | provider_number + year,
                       cluster = ~ state,
                       data = reg.dat.full)

iplot(mod.twfe.full, xlab = 'Time to treatment', main = 'Event study')


## Differential treatment timing event study (Python)


In [None]:
reg_dat_full = hcris_data.loc[~hcris_data["expand_ever"].isna()].copy()

reg_dat_full["time_to_treat"] = np.where(
    reg_dat_full["expand_ever"].astype(bool),
    reg_dat_full["year"] - reg_dat_full["expand_year"],
    0
)

reg_dat_full["time_to_treat"] = reg_dat_full["time_to_treat"].clip(lower=-3)

mod_twfe_full = smf.ols(
    "uncomp_care_m ~ C(time_to_treat)*expand_ever + C(provider_number) + C(year)",
    data=reg_dat_full
).fit()

print(mod_twfe_full.summary())
