In [1]:
import numpy as np
import pandas as pd
import xarray as xr
from pyCIAM.io import get_nearest_slrs
from shared import (
    DIR_SLR_AR6_RAW,
    PATH_PARAMS,
    PATH_SLIIDERS,
    PATH_SLR_INT,
    PATH_SLR_IS_RAW
)

In [2]:
slr_0_year = pd.read_json(PATH_PARAMS).loc["slr_0_year", "values"]

In [3]:
all_ds = []
global_ds = []
global_ssps = []


def open_and_convert(ds_path):
    out = xr.open_dataset(ds_path)

    # for some reason the VLM dataset has an entry for 2005 that is all 0s, while other
    # datasets just don't have 2005 b/c it is the assumed baseline
    if out.years[0].item() == slr_0_year:
        assert (out.sea_level_change.sel(years=slr_0_year) == 0).all()
        out = out.isel(years=slice(1, None))

    out["sea_level_change"] = (
        out.sea_level_change.pint.quantify().pint.to("meters").pint.dequantify()
    )

    # add in the 2019 year so that we enforce pulses only happen in 2020
    this = out.sea_level_change
    init_years = this.years[0].item() - slr_0_year
    init_year = (
        (init_years - 1)
        / init_years
        * this.isel(years=0, drop=True).expand_dims(years=[out.years[0].item() - 1])
    )
    out = out.drop_dims("years").assign(
        sea_level_change=xr.concat((init_year, this), dim="years")
    )
    return out


for kind in ["total", "verticallandmotion"]:
    ds = []
    this_ssps = []
    for conf in ["low", "medium"]:
        for ds_path in (DIR_SLR_AR6_RAW / "regional").glob(f"{kind}_*_{conf}_*"):
            print(f"Processing {ds_path}...")
            this_ssp = ds_path.name.split("_")[1]
            ssp_conf = f"{this_ssp}_{conf}"
            ds.append(open_and_convert(ds_path))
            this_ssps.append(ssp_conf)
            if kind == "total":
                global_ds.append(
                    open_and_convert(DIR_SLR_AR6_RAW / "global" / ds_path.name)
                )
                global_ssps.append(ssp_conf)
    all_ds.append(
        xr.concat(ds, pd.Index(this_ssps, name="scenario"), data_vars="different")
    )

global_ds = (
    xr.concat(global_ds, pd.Index(global_ssps, name="scenario"), data_vars="different")
    .squeeze(drop=True)
    .drop_vars(["lon", "lat"])
    .sea_level_change
)

# handle floating point matching errors on the quantile dimension
global_ds["quantiles"] = all_ds[0].quantiles
all_ds[1]["quantiles"] = all_ds[0].quantiles

all_ds = xr.Dataset(
    {
        "lsl_msl05": all_ds[0].sea_level_change,
        "lsl_ncc_msl05": all_ds[1].sea_level_change,
        "gsl_msl05": global_ds,
        "lon": all_ds[1].lon,
        "lat": all_ds[0].lat,
    }
)

# drop locations with NaN values in the time period we're interested in
valid = (
    all_ds[["lsl_msl05", "lsl_ncc_msl05"]]
    .sel(years=slice(2100))
    .notnull()
    .all(["scenario", "quantiles", "years"])
    .to_array("tmp")
    .all("tmp")
)
all_ds = all_ds.sel(locations=valid)

all_ds = all_ds.rename(
    {"years": "year", "quantiles": "quantile", "locations": "site_id"}
)

# we generally allow +180 but not -180
all_ds["lon"] = all_ds.lon.where(all_ds.lon != -180, 180)

# ensure no locations have missing values
assert all_ds.sel(year=slice(2100)).notnull().all().to_array().all()

Processing /data/raw/slr/ar6/regional/total_ssp245_low_confidence_values.nc...
Processing /data/raw/slr/ar6/regional/total_ssp585_low_confidence_values.nc...
Processing /data/raw/slr/ar6/regional/total_ssp126_low_confidence_values.nc...
Processing /data/raw/slr/ar6/regional/total_ssp370_medium_confidence_values.nc...
Processing /data/raw/slr/ar6/regional/total_ssp245_medium_confidence_values.nc...
Processing /data/raw/slr/ar6/regional/total_ssp119_medium_confidence_values.nc...
Processing /data/raw/slr/ar6/regional/total_ssp126_medium_confidence_values.nc...
Processing /data/raw/slr/ar6/regional/total_ssp585_medium_confidence_values.nc...
Processing /data/raw/slr/ar6/regional/verticallandmotion_ssp585_low_confidence_values.nc...
Processing /data/raw/slr/ar6/regional/verticallandmotion_ssp245_low_confidence_values.nc...
Processing /data/raw/slr/ar6/regional/verticallandmotion_ssp126_low_confidence_values.nc...
Processing /data/raw/slr/ar6/regional/verticallandmotion_ssp245_medium_confid

## Add on the pulse from greenland and antarctica

In [4]:
sliiders = (
    xr.open_zarr(str(PATH_SLIIDERS))
    .load()
    .swap_dims(seg_adm="seg")
    .drop_duplicates("seg")
)

slrs = get_nearest_slrs(
    all_ds,
    sliiders[["seg_lon", "seg_lat"]].reset_coords(drop=True).to_dataframe(),
)

adders = (
    pd.read_parquet(PATH_SLR_IS_RAW)
    .loc[sliiders.seg.to_series().index]
    .assign(constant=0.01)
    .to_xarray()
    .to_array("sheet")
)
# add in GSL
adders = xr.Dataset(
    {"lsl_msl05": adders, "gsl_msl05": xr.ones_like(adders.isel(seg=0, drop=True))}
)

seg_ds = all_ds.sel(site_id=slrs.to_xarray())
del all_ds

In [5]:
newsl = seg_ds[["lsl_msl05", "gsl_msl05"]].isel(year=slice(1, None)) + adders
newsl = (
    xr.concat(
        (
            seg_ds[["lsl_msl05", "gsl_msl05"]]
            .isel(year=[0])
            .expand_dims(sheet=adders.sheet),
            newsl,
        ),
        dim="year",
    )
    .rename(scenario="tmp")
    .stack(tmp2=["tmp", "sheet"])
)
newsl["scenario"] = newsl.tmp + "_" + newsl.sheet
newsl = newsl.set_index(tmp2="scenario").reset_coords(drop=True).rename(tmp2="scenario")

In [6]:
new_scens_added = xr.concat(
    (
        seg_ds[["lsl_msl05", "gsl_msl05"]],
        newsl,
    ),
    dim="scenario",
)

out = (
    xr.merge((seg_ds.drop_dims("scenario"), new_scens_added))
    .swap_dims(seg="site_id")
    .drop_vars("seg")
    .drop_duplicates("site_id")
)

## Save

In [8]:
for v in out.variables:
    out[v].encoding.clear()

out.chunk({"site_id": 100}).to_zarr(str(PATH_SLR_INT), mode="w")

<xarray.backends.zarr.ZarrStore at 0x7fd0200a73c0>