# 10: Package selected statistics into CSV files
*Package thresholded summary statistics into CSV files.*

In [None]:
import fsspec
import geopandas as gpd
import xarray as xr

In [None]:
def write_out_csvs(
    metric,
    regions_df,
    variable,
    units,
    threshold=None,
):
    """
    Read in annual maximum and days over threshold summary datasets and write out"
    CSVs for the multimodel medians.
    """
    template_df = regions_df[
        ["ID_HDC_G0", "UACE20", "hierid", "processing_id"]
    ].sort_values("processing_id")
    column_variable_name = variable.replace("_", " ")
    ds_var_name = variable
    if variable == "days_over":
        column_variable_name += f" {threshold} {units}"
        ds_var_name = f"days_exceeding_{threshold}{units}"
    for scenario in ["historical", "ssp245-2030", "ssp245-2050"]:
        results = xr.open_zarr(
            f"s3://carbonplan-climate-impacts/extreme-heat/v1.0/outputs/zarr/summaries/"
            f"{scenario}-summaries-WBGT-{metric.split('-')[1]}.zarr"
        )
        df = (
            results.sel(gcm="multimodel_media")
            .drop("gcm")[ds_var_name]
            .to_dataframe()
            .sort_values("processing_id")
        )
        template_df[f"{column_variable_name} - CarbonPlan - {scenario}"] = df[
            ds_var_name
        ]

    data_columns = [
        f"{column_variable_name} - CarbonPlan - historical",
        f"{column_variable_name} - CarbonPlan - ssp245-2030",
        f"{column_variable_name} - CarbonPlan - ssp245-2050",
    ]
    if variable == "annual_maximum":
        ~template_df[data_columns].isnull().any(axis=1)
    variable_file_name = f"{variable.replace('_', '-')}"
    if variable == "days_over":
        variable_file_name += f"-{threshold}"
    metric_file_name = f"{metric.split('-')[0].upper()}-{metric.split('-')[1]}"
    # Remove one processing id which was invalid being outside of the domain.
    template_df = template_df.set_index("processing_id").drop([26222]).reindex()
    out_file_name = (
        f"s3://carbonplan-climate-impacts/extreme-heat/v1.0/outputs/csv/"
        f"carbonplan-extreme-heat-{column_variable_name.replace(' ', '-')}"
        f"-{metric_file_name}.csv"
    )
    template_df.to_csv(out_file_name, index=False)

In [None]:
def write_out_heatwave_csvs(metric, regions_df, threshold, units):
    """
    Read in heatwave summary datasets and write out CSVs for the multimodel medians.
    """
    template_df = regions_df[
        ["ID_HDC_G0", "UACE20", "hierid", "processing_id"]
    ].sort_values("processing_id")
    ds_var_name = f"heatwave-days-over-{threshold}{units}"
    column_variable_name = ds_var_name.replace("-", " ") + f" {units}"

    for scenario in ["historical", "ssp245-2030", "ssp245-2050"]:
        results = xr.open_zarr(
            f"s3://carbonplan-climate-impacts/extreme-heat/v1.0/outputs/zarr/summaries/"
            f"{scenario}-summaries-heatwaves-WBGT-{metric.split('-')[1]}.zarr"
        )
        df = (
            results.sel(gcm="multimodel_media")
            .drop("gcm")[ds_var_name]
            .to_dataframe()
            .sort_values("processing_id")
        )
        template_df[f"{column_variable_name} - CarbonPlan - {scenario}"] = df[
            ds_var_name
        ]
    metric_file_name = f"{metric.split('-')[0].upper()}-{metric.split('-')[1]}"
    # drop one region because it does not have input data and thus is NaN
    template_df = template_df.set_index("processing_id").drop([26222]).reindex()
    out_file_name = (
        f"s3://carbonplan-climate-impacts/extreme-heat/v1.0/outputs/csv/"
        f"carbonplan-extreme-heat-{column_variable_name.replace(' ', '-')}"
        f"-{metric_file_name}.csv"
    )
    template_df.to_csv(out_file_name, index=False)

In [None]:
path = (
    "s3://carbonplan-climate-impacts/extreme-heat/v1.0/inputs/"
    "all_regions_and_cities.json"
)
with fsspec.open(path) as file:
    regions_df = gpd.read_file(file)
sample_ds = xr.open_zarr(
    "s3://carbonplan-scratch/extreme-heat/wbgt-sun-regions/wbgt-sun-ACCESS-CM2.zarr"
)
regions_df = regions_df[
    regions_df["processing_id"].isin(sample_ds.processing_id.values)
]

Write out CSVs summarizing historical and future annual maxima for WBGT in the sun and in the shade.

In [None]:
for metric in ["wbgt-shade", "wbgt-sun"]:
    write_out_csvs(metric, regions_df, "annual_maximum", units="degC")

Write out CSVs summarizing historical and future days over threshold for WBGT in the sun and in the shade.

In [None]:
for metric in ["wbgt-shade", "wbgt-sun"]:
    for threshold in [29, 30.5, 32, 35]:
        write_out_csvs(
            metric, regions_df, "days_over", threshold=threshold, units="degC"
        )

Write out CSVs summarizing historical and future heatwave days for WBGT in the sun and in the shade according to a set of thresholds.

In [None]:
for metric in ["wbgt-shade", "wbgt-sun"]:
    for threshold in [29, 30.5, 32, 35]:
        write_out_heatwave_csvs(metric, regions_df, threshold=threshold, units="degC")