In [None]:
import numpy as np
import pandas as pd


def mapd_clean_merge(ma_data: pd.DataFrame, mapd_data: pd.DataFrame, y: int) -> pd.DataFrame:
    """
    Python/pandas translation of the R function mapd.clean.merge().

    - Keeps relevant columns
    - "fills" missing values within (contractid, planid, state, county) groups
    - removes duplicates by keeping the first row per group
    - full-joins MA and MA-PD on keys
    - adds year
    """

    keys = ["contractid", "planid", "state", "county"]

    # --- Tidy MA-only data -------------------------------------------------
    ma = ma_data.loc[:, ["contractid", "planid", "state", "county", "premium"]].copy()

    # R: group_by(...) %>% fill(premium)
    # Equivalent: within each group, forward-fill then back-fill
    ma["premium"] = ma.groupby(keys, dropna=False)["premium"].transform(lambda s: s.ffill().bfill())

    # R: remove duplicates by row_number() then filter(id_count==1)
    # Equivalent: keep first row per group (preserves original order)
    ma = ma.drop_duplicates(subset=keys, keep="first").copy()

    # --- Tidy MA-PD data ---------------------------------------------------
    mapd_cols = [
        "contractid", "planid", "state", "county",
        "premium_partc", "premium_partd_basic", "premium_partd_supp",
        "premium_partd_total", "partd_deductible"
    ]
    mp = mapd_data.loc[:, mapd_cols].copy()

    # R: mutate(planid=as.numeric(planid))
    mp["planid"] = pd.to_numeric(mp["planid"], errors="coerce")

    # R: fill(...) within group
    fill_cols = [
        "premium_partc", "premium_partd_basic", "premium_partd_supp",
        "premium_partd_total", "partd_deductible"
    ]
    for c in fill_cols:
        mp[c] = mp.groupby(keys, dropna=False)[c].transform(lambda s: s.ffill().bfill())

    # Remove duplicates (keep first per group)
    mp = mp.drop_duplicates(subset=keys, keep="first").copy()

    # --- Merge Part D info to Part C info ---------------------------------
    plan_premiums = ma.merge(mp, on=keys, how="outer")
    plan_premiums["year"] = y

    return plan_premiums