In [None]:
import pandas as pd
from pathlib import Path


MA_COLUMNS = [
    "state","county","org_name","plan_name","plan_type","premium","partd_deductible",
    "drug_type","gap_coverage","drug_type_detail","demo_type","contractid",
    "planid","segmentid"
]

MA_DTYPES = {
    "state": "string",
    "county": "string",
    "org_name": "string",
    "plan_name": "string",
    "plan_type": "string",
    "premium": "float64",
    "partd_deductible": "float64",
    "drug_type": "string",
    "gap_coverage": "string",
    "drug_type_detail": "string",
    "demo_type": "string",
    "contractid": "string",
    "planid": "float64",
    "segmentid": "float64",
}


MAPD_COLUMNS = [
    "state","county","org_name","plan_name","contractid","planid","segmentid",
    "org_type","plan_type","snp","snp_type","benefit_type","below_benchmark",
    "national_pdp","partd_rein_demo","partd_rein_demo_type","premium_partc",
    "premium_partd_basic","premium_partd_supp","premium_partd_total",
    "partd_assist_full","nothing","partd_assist_75","partd_assist_50","partd_assist_25",
    "partd_deductible","increase_coverage_limit","gap_coverage","gap_coverage_type"
]

MAPD_DTYPES = {
    "state": "string",
    "county": "string",
    "org_name": "string",
    "plan_name": "string",
    "contractid": "string",
    "org_type": "string",
    "plan_type": "string",
    "snp": "string",
    "snp_type": "string",
    "benefit_type": "string",
    "below_benchmark": "string",
    "national_pdp": "string",
    "partd_rein_demo": "string",
    "partd_rein_demo_type": "string",
    "gap_coverage": "string",
    "gap_coverage_type": "string",
    "planid": "float64",
    "segmentid": "float64",
    "premium_partc": "float64",
    "premium_partd_basic": "float64",
    "premium_partd_supp": "float64",
    "premium_partd_total": "float64",
    "partd_assist_full": "float64",
    "nothing": "float64",
    "partd_assist_75": "float64",
    "partd_assist_50": "float64",
    "partd_assist_25": "float64",
    "partd_deductible": "float64",
    "increase_coverage_limit": "float64",
}

def read_ma_csv(path: str | Path) -> pd.DataFrame:
    return pd.read_csv(
        path,
        skiprows=5, 
        header=None,
        names=MA_COLUMNS, 
        dtype=MA_DTYPES,
        low_memory=False,
    )

def read_mapd_xls(path: str | Path, sheet: str, nrows: int) -> pd.DataFrame:
    return pd.read_excel(
        path,
        engine="xlrd",     
        sheet_name=sheet,
        skiprows=4,  
        nrows=nrows,  
        header=None,
        names=MAPD_COLUMNS,
        dtype=MAPD_DTYPES,
    )


def load_landscape_2008(y: int) -> tuple[pd.DataFrame, pd.DataFrame]:
    ma_path_a = Path("data/input/ma/landscape/Extracted Data") / "2008LandscapeSourceData_MA_09_25_07(A-M).csv"
    ma_data_a = read_ma_csv(ma_path_a)

    ma_path_b = Path("data/input/ma/landscape/Extracted Data") / "2008LandscapeSourceData_MA_09_25_07(N-W).csv"
    ma_data_b = read_ma_csv(ma_path_b)

    ma_data = pd.concat([ma_data_a, ma_data_b], ignore_index=True)  # R: rbind()

    mapd_path = Path("data/input/ma/landscape/Extracted Data/PartCD/2008") / "Medicare Part D 2008 Plan Report 11-06-07.xls"

    mapd_data_a = read_mapd_xls(mapd_path, sheet="Alabama to Montana", nrows=39467)
    mapd_data_b = read_mapd_xls(mapd_path, sheet="Nebraska to Wyoming", nrows=44704)

    mapd_data = pd.concat([mapd_data_a, mapd_data_b], ignore_index=True)  # R: rbind()

    return ma_data, mapd_data

def mapd_clean_merge(ma_data: pd.DataFrame, mapd_data: pd.DataFrame, y: int) -> pd.DataFrame:
    raise NotImplementedError("Translate mapd.clean.merge() from R to Python and plug it in here.")

# Example
# y = 2008
# ma_data, mapd_data = load_landscape_2008(y)
# final_landscape = mapd_clean_merge(ma_data=ma_data, mapd_data=mapd_data, y=y)