In [None]:
import pandas as pd
from pathlib import Path


MA_COLUMNS_2013 = [
    "state","county","org_name","plan_name","plan_type","premium","partd_deductible",
    "drug_type","gap_coverage","drug_type_detail","contractid",
    "planid","segmentid","moop","star_rating"
]

MA_DTYPES_2013 = {
    "state": "string",
    "county": "string",
    "org_name": "string",
    "plan_name": "string",
    "plan_type": "string",
    "premium": "float64",
    "partd_deductible": "float64",
    "drug_type": "string",
    "gap_coverage": "string",
    "drug_type_detail": "string",
    "contractid": "string",
    "planid": "float64",
    "segmentid": "float64",
    "moop": "string",
    "star_rating": "string",
}

MAPD_COLUMNS_2013 = [
    "state","county","org_name","plan_name","contractid","planid","segmentid",
    "org_type","plan_type","snp","snp_type","benefit_type","below_benchmark",
    "national_pdp","premium_partc",
    "premium_partd_basic","premium_partd_supp","premium_partd_total",
    "partd_assist_full","partd_assist_75","partd_assist_50","partd_assist_25",
    "partd_deductible","deductible_exclusions","increase_coverage_limit",
    "gap_coverage","gap_coverage_type"
]

MAPD_DTYPES_2013 = {
    "state": "string",
    "county": "string",
    "org_name": "string",
    "plan_name": "string",
    "contractid": "string",
    "org_type": "string",
    "plan_type": "string",
    "snp": "string",
    "snp_type": "string",
    "benefit_type": "string",
    "below_benchmark": "string",
    "national_pdp": "string",
    "deductible_exclusions": "string",
    "gap_coverage": "string",
    "gap_coverage_type": "string",
    "planid": "float64",
    "segmentid": "float64",
    "premium_partc": "float64",
    "premium_partd_basic": "float64",
    "premium_partd_supp": "float64",
    "premium_partd_total": "float64",
    "partd_assist_full": "float64",
    "partd_assist_75": "float64",
    "partd_assist_50": "float64",
    "partd_assist_25": "float64",
    "partd_deductible": "float64",
    "increase_coverage_limit": "float64",
}

def read_ma_2013_csv(path: str | Path) -> pd.DataFrame:
    return pd.read_csv(
        path,
        skiprows=6, 
        header=None,
        names=MA_COLUMNS_2013,
        dtype=MA_DTYPES_2013,
        low_memory=False,
    )

def read_mapd_2013_xls(path: str | Path, sheet: str, nrows: int) -> pd.DataFrame:
    """
    R: read_xls(range="A5:AA<end>", sheet=..., col_names=...)
    pandas equivalent: skip first 4 rows (start at row 5), read nrows rows.
    """
    return pd.read_excel(
        path,
        engine="xlrd",
        sheet_name=sheet,
        skiprows=4,
        nrows=nrows,
        header=None,
        names=MAPD_COLUMNS_2013,
        dtype=MAPD_DTYPES_2013,
    )


def load_landscape_2013_inputs(y: int) -> tuple[pd.DataFrame, pd.DataFrame]:
    ma_path_a = Path("data/input/ma/landscape/Extracted Data") / "2013LandscapeSource file MA_AtoM 11212012.csv"
    ma_data_a = read_ma_2013_csv(ma_path_a)

    ma_path_b = Path("data/input/ma/landscape/Extracted Data") / "2013LandscapeSource file MA_NtoW 11212012.csv"
    ma_data_b = read_ma_2013_csv(ma_path_b)

    ma_data = pd.concat([ma_data_a, ma_data_b], ignore_index=True) 

    mapd_path = (
        Path("data/input/ma/landscape/Extracted Data/PartCD/2013")
        / "Medicare Part D 2013 Plan Report 04252013v1.xls"
    )

    mapd_data_a = read_mapd_2013_xls(mapd_path, sheet="Alabama to Montana", nrows=20936)
    mapd_data_b = read_mapd_2013_xls(mapd_path, sheet="Nebraska to Wyoming", nrows=23808)

    mapd_data = pd.concat([mapd_data_a, mapd_data_b], ignore_index=True)  

    return ma_data, mapd_data

def mapd_clean_merge(ma_data: pd.DataFrame, mapd_data: pd.DataFrame, y: int) -> pd.DataFrame:
    raise NotImplementedError("Translate mapd.clean.merge() from R to Python and plug it in here.")

# Example 
# y = 2013
# ma_data, mapd_data = load_landscape_2013_inputs(y)
# final_landscape = mapd_clean_merge(ma_data=ma_data, mapd_data=mapd_data, y=y)
