In [None]:
import pandas as pd
import numpy as np

# Column names (mean_risk not in source file)
cols = [
    "ssa", "state", "county_name", "parta_enroll",
    "parta_reimb", "parta_percap", "parta_reimb_unadj",
    "parta_percap_unadj", "parta_ime", "parta_dsh",
    "parta_gme", "partb_enroll",
    "partb_reimb", "partb_percap"
]

# Read Excel file
ffs_data = pd.read_excel(
    "data/input/ffs-costs/Extracted Data/FFS2020/FFS20.xlsx",
    skiprows=2,
    names=cols,
    na_values=["*", "."]
)

# Add missing mean_risk column
ffs_data["mean_risk"] = np.nan

# Convert SSA to numeric
ffs_data["ssa"] = pd.to_numeric(ffs_data["ssa"], errors="coerce")

# Equivalent to readr::parse_number()
cols_to_parse = [
    "parta_enroll", "parta_reimb",
    "partb_enroll", "partb_reimb",
    "mean_risk"
]

for col in cols_to_parse:
    ffs_data[col] = (
        ffs_data[col]
        .astype(str)
        .str.replace(r"[^\d\.\-]", "", regex=True)
        .pipe(pd.to_numeric, errors="coerce")
    )

# Select final columns + add year
final_ffs_costs = (
    ffs_data[
        ["ssa", "state", "county_name",
         "parta_enroll", "parta_reimb",
         "partb_enroll", "partb_reimb", "mean_risk"]
    ]
    .assign(year=2020)
)