# Aggregate Zonal Means


In [None]:
import pandas as pd

from cholera import outbreaks

We will read the zonal means CSVs and then use the data to create an aggregate dataframe.


In [None]:
def read_csv_sorted(csv: str) -> pd.DataFrame:
    return pd.read_csv(
        csv, index_col=["location_period_id", "year", "month"]
    ).sort_index()


def assign_trailing_values(df: pd.DataFrame) -> pd.DataFrame:
    name = df.columns[0]

    return (
        df.sort_index()
        .assign(
            **{
                f"{name}_3": lambda df: df[name].shift(3),
                f"{name}_2": lambda df: df[name].shift(2),
                f"{name}_1": lambda df: df[name].shift(1),
                f"{name}_0": lambda df: df[name],
            }
        )
        .drop(columns=[name])
        .iloc[3:]
    )

In [None]:
lst_df = read_csv_sorted("../data/zonal-means-lst-200910-201912.csv")
lst_df

In [None]:
trailing_lst_df = assign_trailing_values(lst_df)
trailing_lst_df

In [None]:
precip_df = read_csv_sorted("../data/zonal-means-precip-200910-201912.csv")
precip_df

In [None]:
trailing_precip_df = assign_trailing_values(precip_df)
trailing_precip_df

In [None]:
sm_df = read_csv_sorted("../data/zonal-means-sm-200910-201912.csv")
sm_df

In [None]:
trailing_sm_df = assign_trailing_values(sm_df)
trailing_sm_df

In [None]:
aggregate_zonal_means_df = trailing_lst_df.join(trailing_precip_df, how="outer").join(
    trailing_sm_df, how="outer"
)

aggregate_zonal_means_df

In [None]:
outbreak_flag_df = (
    outbreaks.within_admin2()[["location_period_id", "start_year", "start_month"]]
    .rename(columns={"start_year": "year", "start_month": "month"})
    .set_index(["location_period_id", "year", "month"])
    .sort_index()
    .assign(outbreak=1)
)

outbreak_flag_df

In [None]:
ml_ready_df = (
    aggregate_zonal_means_df.join(outbreak_flag_df, how="left")
    .fillna({"outbreak": 0})
    .convert_dtypes()
)

ml_ready_df

In [None]:
ml_ready_df.value_counts("outbreak")

In [None]:
ml_ready_df.to_csv("../data/zonal-means-aggregate-200910-201912.csv")