In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression

In [2]:
df = pd.read_csv('data/states_hic_cleaned.csv')

In [9]:
# --- 1. Prepare data ---

df = df[df["State"] != "United States"].copy()

df["Uninsured Rate Change (2010-2015)"] = df["Uninsured Rate Change (2010-2015)"].astype(float)
df["Health Insurance Coverage Change (2010-2015)"] = df["Health Insurance Coverage Change (2010-2015)"].astype(float)
df["Medicaid Enrollment Change (2013-2016)"] = df["Medicaid Enrollment Change (2013-2016)"].astype(float)
df["State Medicaid Expansion (2016)"] = df["State Medicaid Expansion (2016)"].astype(bool)


# Scale large counts
df["Coverage Change (millions)"] = df["Health Insurance Coverage Change (2010-2015)"] / 1_000_000
df["Medicaid Change (millions)"] = df["Medicaid Enrollment Change (2013-2016)"] / 1_000_000

# Target variable
y = df["Uninsured Rate Change (2010-2015)"].values

# Feature variables
X = df[[
    "State Medicaid Expansion (2016)",
    "Coverage Change (millions)",
    "Medicaid Change (millions)"
]].copy()


# Convert boolean to integer
X["State Medicaid Expansion (2016)"] = X["State Medicaid Expansion (2016)"].astype(int)


# --- 2. Fit linear regression ---

# Create and fit the model
lin_reg = LinearRegression()
lin_reg.fit(X, y)



# --- 3. Predict a 5‑year change under a policy‑as‑is scenario ---

# Predict the change for each state
pred_change_state = lin_reg.predict(X)

# Create a DataFrame to hold the projections
projection_df = df[["State"]].copy()


# Add the predicted change to the DataFrame
projection_df["Pred Change (pct points, 5 yrs)"] = pred_change_state


# --- 4. Aggregate to a national 2020 projection ---

us_2015 = 9.4  # national uninsured rate in 2015 from the dataset

# Calculate the average predicted change across all states
avg_pred_change = projection_df["Pred Change (pct points, 5 yrs)"].mean()

# Projected national uninsured rate in 2020
us_2020_proj = us_2015 + avg_pred_change

# --- 5. Output results ---
us_2015, avg_pred_change, us_2020_proj

(9.4, np.float64(-5.433333333333334), np.float64(3.966666666666667))

###### Scikit‑learn linear regression was used to relate state‑level uninsured rate changes to Medicaid expansion status and coverage gains, then the fitted model’s average predicted 5‑year change was added to the observed 2015 national uninsured rate (9.4%) to obtain a projected 2020 rate under a “policy‑as‑is” scenario.

###### The linear model predicts an average 5‑year drop of about 5.4 percentage points in the uninsured rate, implying a projected national uninsured rate of roughly 4.0% in 2020, down from 9.4% in 2015. This suggests that, if the 2010–2015 relationship between Medicaid expansion, coverage gains, and uninsured rate changes had continued unchanged for another five years, the U.S. could have approached very low uninsured levels by 2020, much closer to near‑universal coverage.
