# 10: Package selected statistics into CSV files
*Package thresholded summary statistics into CSV files.*

In [None]:
import fsspec
import geopandas as gpd
import xarray as xr

In [None]:
def write_out_csvs(
    metric,
    regions_df,
    variable,
    units,
    threshold=None,
):
    """
    Read in annual maximum and days over threshold summary datasets and write out"
    CSVs for the multimodel medians.
    """
    template_df = regions_df[
        ["ID_HDC_G0", "UC_NM_MN", "processing_id", "longitude", "latitude"]
    ].sort_values("processing_id")
    column_variable_name = variable.replace("_", " ")
    ds_var_name = variable
    if variable == "days_over":
        column_variable_name += f" {threshold} {units}"
        ds_var_name = f"days_exceeding_{threshold}{units}"
    elif variable == "holiday_days_over":
        print(column_variable_name)
        column_variable_name += f" {threshold} {units}"
        ds_var_name = f"holiday_days_exceeding_{threshold}{units}"
    for scenario in [
        "historical",
        "ssp245-2030",
        "ssp245-2050",
        "ssp245-2090",
        "ssp370-2030",
        "ssp370-2050",
        "ssp370-2090",
    ]:
        results = xr.open_zarr(
            f"s3://carbonplan-scratch/extreme-heat-extension/v1.1/outputs/zarr/summaries/"
            f"{scenario}-summaries-WBGT-{metric.split('-')[1]}.zarr"
        )
        if scenario == "historical":
            df = (
                results.sel(gcm="multimodel_media")
                .sel(scenario="ssp245")
                .drop("gcm")[ds_var_name]
                .to_dataframe()
                .sort_values("processing_id")
            )
        else:
            df = (
                results.sel(gcm="multimodel_media")
                .sel(scenario=scenario.split("-")[0])
                .drop("gcm")[ds_var_name]
                .to_dataframe()
                .sort_values("processing_id")
            )
        template_df[f"{column_variable_name} - CarbonPlan - {scenario}"] = df[
            ds_var_name
        ]

    data_columns = [
        f"{column_variable_name} - CarbonPlan - historical",
        f"{column_variable_name} - CarbonPlan - ssp245-2030",
        f"{column_variable_name} - CarbonPlan - ssp245-2050",
        f"{column_variable_name} - CarbonPlan - ssp245-2090",
        f"{column_variable_name} - CarbonPlan - ssp370-2030",
        f"{column_variable_name} - CarbonPlan - ssp370-2050",
        f"{column_variable_name} - CarbonPlan - ssp370-2090",
    ]
    if variable == "annual_maximum":
        ~template_df[data_columns].isnull().any(axis=1)
    variable_file_name = f"{variable.replace('_', '-')}"
    if variable == "days_over":
        variable_file_name += f"-{threshold}"
    metric_file_name = f"{metric.split('-')[0].upper()}-{metric.split('-')[1]}"
    # Remove one processing id which was invalid being outside of the domain.
    template_df = template_df.set_index("processing_id").reindex()
    out_file_name = (
        f"s3://carbonplan-scratch/extreme-heat-extension/v1.1/outputs/csv/"
        f"carbonplan-extreme-heat-{column_variable_name.replace(' ', '-')}"
        f"-{metric_file_name}.csv"
    )
    template_df.to_csv(out_file_name, index=False)

In [None]:
path = (
    "s3://carbonplan-climate-impacts/extreme-heat/v1.0/inputs/"
    "all_regions_and_cities.json"
)
with fsspec.open(path) as file:
    regions_df = gpd.read_file(file)
sample_ds = xr.open_zarr(
    "s3://carbonplan-scratch/extreme-heat/wbgt-sun-regions/wbgt-sun-ACCESS-CM2-ssp245.zarr"
)
regions_df = regions_df[
    regions_df["processing_id"].isin(sample_ds.processing_id.values)
]
regions_df = regions_df.drop(
    ["UACE20", "NAMELSAD20", "gadmid", "hierid", "ISO"], axis=1
)

In [None]:
regions_df["longitude"] = regions_df.centroid.x
regions_df["latitude"] = regions_df.centroid.y

Write out CSVs summarizing historical and future annual maxima for WBGT in the sun and in the shade.

In [None]:
for metric in ["wbgt-shade", "wbgt-sun"]:
    write_out_csvs(metric, regions_df, "annual_maximum", units="degC")

Write out CSVs summarizing historical and future days over threshold for WBGT in the sun and in the shade.

In [None]:
for metric in ["wbgt-shade", "wbgt-sun"]:
    for threshold in [25, 27, 29, 30.5, 32, 35]:
        write_out_csvs(
            metric, regions_df, "days_over", threshold=threshold, units="degC"
        )

Write out CSVs summarizing historical and future days in the holiday season exceeding different thresholds of WBGT in the sun and in the shade.

In [None]:
for metric in ["wbgt-shade", "wbgt-sun"]:
    for threshold in [25, 27, 29, 30.5, 32, 35]:
        write_out_csvs(
            metric, regions_df, "holiday_days_over", threshold=threshold, units="degC"
        )