In [None]:
import re
import pandas as pd

def parse_number_like_readr(x):
    """
    Rough equivalent of readr::parse_number(as.character(.)):
    - extracts the first numeric token, ignoring commas, %, stray text.
    """
    if x is None or (isinstance(x, float) and pd.isna(x)):
        return float("nan")
    s = str(x).strip()
    if s == "":
        return float("nan")
    s = s.replace(",", "")
    m = re.search(r"-?\d+(?:\.\d+)?", s)
    return float(m.group(0)) if m else float("nan")


ma_path_a = (
    "data/input/ma/star-ratings/Extracted Star Ratings/Part C 2013 Fall/"
    "2013_Part_C_Report_Card_Master_Table_2012_10_17_Star.csv"
)
star_data_a = pd.read_csv(
    ma_path_a,
    skiprows=4,
    names=rating_vars_2013,        
    header=None,
    na_values=["", "NA", "*"],
    keep_default_na=True,
)

exclude_cols = {"contractid", "org_type", "contract_name", "org_marketing", "org_parent"}
cols_to_parse = [c for c in star_data_a.columns if c not in exclude_cols]
for c in cols_to_parse:
    star_data_a[c] = star_data_a[c].map(parse_number_like_readr).astype("float64")


ma_path_b = (
    "data/input/ma/star-ratings/Extracted Star Ratings/Part C 2013 Fall/"
    "2013_Part_C_Report_Card_Master_Table_2012_10_17_Summary.csv"
)
star_data_b = pd.read_csv(
    ma_path_b,
    skiprows=2,
    names=[
        "contractid","org_type","org_marketing","contract_name","org_parent",
        "partc_score","partc_lowscore","partc_highscore",
        "partcd_score","partcd_lowscore","partcd_highscore"
    ],
    header=None,
    na_values=["", "NA", "*"],
    keep_default_na=True,
)

star_data_b = star_data_b.assign(
    new_contract=lambda d: (
        (d["partc_score"] == "Plan too new to be measured")
        | (d["partcd_score"] == "Plan too new to be measured")
    ).astype("int64")
)

star_data_b["partc_score"] = star_data_b.apply(
    lambda r: float("nan") if r["new_contract"] == 1 else parse_number_like_readr(r["partc_score"]),
    axis=1
).astype("float64")

star_data_b["partcd_score"] = star_data_b.apply(
    lambda r: float("nan") if r["new_contract"] == 1 else parse_number_like_readr(r["partcd_score"]),
    axis=1
).astype("float64")

star_data_b["low_score"] = (star_data_b["partc_lowscore"] == "Yes").astype("float64")

star_data_b = star_data_b.loc[:, ["contractid", "new_contract", "low_score", "partc_score", "partcd_score"]]

final_star_ratings = (
    star_data_a
    .drop(columns=["contract_name", "org_type", "org_marketing"], errors="ignore")
    .merge(star_data_b, on="contractid", how="left") 
    .assign(year=2013)
)
