In [None]:
import pandas as pd
from pathlib import Path


def monthlist_for_year(y: int) -> list[str]:
    months = range(10, 13) if y == 2006 else range(1, 13)
    return [f"{m:02d}" for m in months]


SA_COLUMNS = [
    "contractid","org_name","org_type","plan_type","partial","eghp",
    "ssa","fips","county","state","notes"
]

SA_DTYPES = {
    "contractid": "string",
    "org_name":   "string",
    "org_type":   "string",
    "plan_type":  "string",
    "partial":    "boolean",
    "eghp":       "string",
    "ssa":        "float64",
    "fips":       "float64",
    "county":     "string",
    "state":      "string",
    "notes":      "string",
}

def read_service_area(path: str | Path) -> pd.DataFrame:
    return pd.read_csv(
        path,
        skiprows=1,
        header=None,
        names=SA_COLUMNS,
        na_values=["*"],
        dtype=SA_DTYPES,
        low_memory=False,
    )


def load_month_sa(m: str, y: int) -> pd.DataFrame:
    path = Path("data/input/ma/service-area/Extracted Data") / f"MA_Cnty_SA_{y}_{m}.csv"
    df = read_service_area(path)
    df["month"] = int(m)
    df["year"] = y
    return df


def build_final_service_area(y: int) -> tuple[pd.DataFrame, pd.DataFrame]:
    
    monthlist = monthlist_for_year(y)
    service_year = pd.concat((load_month_sa(m, y) for m in monthlist), ignore_index=True)

    service_year = service_year.sort_values(
        ["contractid", "fips", "state", "county", "month"],
        kind="mergesort"  # stable
    )

    service_year["fips"] = (
        service_year.groupby(["state", "county"], dropna=False)["fips"]
        .transform(lambda s: s.ffill().bfill())
    )

    # R: group_by(contractid) %>% fill(plan_type, partial, eghp, org_type, org_name, .direction="downup")
    fill_cols = ["plan_type", "partial", "eghp", "org_type", "org_name"]
    service_year[fill_cols] = (
        service_year.groupby(["contractid"], dropna=False)[fill_cols]
        .transform(lambda df: df.ffill().bfill())
    )

    service_year = service_year.sort_values(["contractid", "fips", "year", "month"], kind="mergesort")
    final_service_area = (
        service_year.groupby(["contractid", "fips", "year"], dropna=False, as_index=False)
        .agg(
            state=("state", "last"),
            county=("county", "last"),
            org_name=("org_name", "last"),
            org_type=("org_type", "last"),
            plan_type=("plan_type", "last"),
            partial=("partial", "last"),
            eghp=("eghp", "last"),
            ssa=("ssa", "last"),
            notes=("notes", "last"),
        )
    )

    return service_year, final_service_area

# Example:
# y = 2006
# service_year, final_service_area = build_final_service_area(y)
