In [None]:
import xarray as xr
import zarr
import os
import bokeh
from matplotlib import pyplot as plt
import numpy as np
from adlfs import AzureBlobFileSystem

In [None]:
from dask_gateway import GatewayCluster
from dask.distributed import Client

In [None]:
cluster = GatewayCluster()

In [None]:
cluster

In [None]:
client = Client(cluster)

In [None]:
cluster.adapt(minimum=2, maximum=40)

First, load in your list of models for which downscaled climate simulations are
available.


In [None]:
fs = AzureBlobFileSystem(
    "carbonplan", account_key=os.environ["BLOB_ACCOUNT_KEY"]
)
file_list = fs.ls("carbonplan-scratch/downscaling/bias-correction")
files = [file.split("/")[-2] for file in file_list]

Then load up a sample dataset to take a look at the domain and understand what
you're working with.


In [None]:
store_url = f"downscaling/bias-correction/{files[40]}"
store = zarr.storage.ABSStore(
    "carbonplan-scratch",
    prefix=store_url,
    account_name="carbonplan",
    account_key=os.environ["BLOB_ACCOUNT_KEY"],
)
ds = xr.open_zarr(store, consolidated=True)
ds.pr.isel(time=0).plot()

Then you can access the names of the individual GCMs that we have available.


In [None]:
models = list(set([(".").join(filename.split(".")[1:3]) for filename in files]))
scenarios = [
    ("CMIP", "historical", slice("1995-01-01", "2015-12-31")),
    ("ScenarioMIP", "ssp245", slice("2050-01-01", "2069-12-31")),
    ("ScenarioMIP", "ssp370", slice("2050-01-01", "2069-12-31")),
    ("ScenarioMIP", "ssp585", slice("2050-01-01", "2069-12-31")),
]

First let's create a few bounding boxes to get at some regional differences


In [None]:
region_bounding_boxes = {
    "Pacific Northwest": {"lat": [41, 49], "lon": [-130, -110]},
    "West": {"lat": [20, 49], "lon": [-130, -105]},
    "Northeast": {"lat": [41, 48], "lon": [-93, -66]},
    "Southeast": {"lat": [25, 37], "lon": [-93, -76]},
}

Let's then create a data array with dimensions [GCM, scenario, region] to house
all of our analyses. We'll start with precipitation and then just copy it for
tasmax and tasmin.


In [None]:
precipitation = xr.DataArray(
    np.zeros((len(models), len(scenarios), len(region_bounding_boxes))),
    dims=("gcm", "scenario", "region"),
    coords={
        "gcm": models,
        "scenario": ["historical", "ssp245", "ssp370", "ssp585"],
        "region": list(region_bounding_boxes.keys()),
    },
)

In [None]:
tasmax = precipitation.copy(deep=True)
tasmin = precipitation.copy(deep=True)

Now let's fill each of the prepared summary arrays with the averages from all of
the different GCMs for that multiple boxes for each scenario.


In [None]:
for region in region_bounding_boxes.keys():
    print(region)
    lat_bounds = region_bounding_boxes[region]["lat"]
    lon_bounds = region_bounding_boxes[region]["lon"]

    for (experiment, scenario, time_slice) in scenarios:
        for gcm in models:
            store_url = f"downscaling/bias-correction/{experiment}.{gcm}.{scenario}.Amon.gn"
            store = zarr.storage.ABSStore(
                "carbonplan-scratch",
                prefix=store_url,
                account_name="carbonplan",
                account_key=os.environ["BLOB_ACCOUNT_KEY"],
            )
            try:
                ds = xr.open_zarr(store, consolidated=True)
                print(store_url)
                ds_box = (
                    ds.where(ds.lat > lat_bounds[0])
                    .where(ds.lat < lat_bounds[1])
                    .where(ds.lon > lon_bounds[0])
                    .where(ds.lon < lon_bounds[1])
                    .sel(time=time_slice)
                )

                # hacky way of finding non-ocean (note for northeast- this does not mask
                # out the Great Lakes- I need a land mask for that)
                mask = (
                    ds["tasmax"]
                    .where(ds.lat > lat_bounds[0])
                    .where(ds.lat < lat_bounds[1])
                    .where(ds.lon > lon_bounds[0])
                    .where(ds.lon < lon_bounds[1])
                    .isel(time=0)
                    > 0
                )

                print(gcm, scenario)

                precipitation.loc[gcm].loc[scenario].loc[region] = (
                    ds_box["pr"]
                    .where(mask)
                    .sum(dim="time")
                    .mean(dim=["x", "y"])
                    .values[0]
                )
                tasmax.loc[gcm].loc[scenario].loc[region] = (
                    ds_box["tasmax"]
                    .where(mask)
                    .mean(dim="time")
                    .mean(dim=["x", "y"])
                    .values[0]
                )
                tasmin.loc[gcm].loc[scenario].loc[region] = (
                    ds_box["tasmax"]
                    .where(mask)
                    .mean(dim="time")
                    .mean(dim=["x", "y"])
                    .values[0]
                )

            except:
                print("uh oh! {} {} doesnt work".format(gcm, scenario))
        # Feels like it would be cleaner if I just took a .sel(x=slice()) approach...
        # but that got messy with the meters units so this feels like
        # it's longer but also more readable

In [None]:
for region in region_bounding_boxes.keys():
    print(region)
    lat_bounds = region_bounding_boxes[region]["lat"]
    lon_bounds = region_bounding_boxes[region]["lon"]

    for (experiment, scenario, time_slice) in scenarios:
        for gcm in models:
            store_url = f"downscaling/bias-correction/{experiment}.{gcm}.{scenario}.Amon.gn"
            store = zarr.storage.ABSStore(
                "carbonplan-scratch",
                prefix=store_url,
                account_name="carbonplan",
                account_key=os.environ["BLOB_ACCOUNT_KEY"],
            )
            try:
                ds = xr.open_zarr(store, consolidated=True)
                print(store_url)
                ds_box = (
                    ds.where(ds.lat > lat_bounds[0])
                    .where(ds.lat < lat_bounds[1])
                    .where(ds.lon > lon_bounds[0])
                    .where(ds.lon < lon_bounds[1])
                    .sel(time=time_slice)
                )

                # hacky way of finding non-ocean (note for northeast- this does not mask
                # out the Great Lakes- I need a land mask for that)
                mask = (
                    ds["tasmax"]
                    .where(ds.lat > lat_bounds[0])
                    .where(ds.lat < lat_bounds[1])
                    .where(ds.lon > lon_bounds[0])
                    .where(ds.lon < lon_bounds[1])
                    .isel(time=0)
                    > 0
                )

                print(gcm, scenario)

                precipitation.loc[gcm].loc[scenario].loc[region] = (
                    ds_box["pr"]
                    .where(mask)
                    .sum(dim="time")
                    .mean(dim=["x", "y"])
                    .values[0]
                )
                tasmax.loc[gcm].loc[scenario].loc[region] = (
                    ds_box["tasmax"]
                    .where(mask)
                    .mean(dim="time")
                    .mean(dim=["x", "y"])
                    .values[0]
                )
                tasmin.loc[gcm].loc[scenario].loc[region] = (
                    ds_box["tasmax"]
                    .where(mask)
                    .mean(dim="time")
                    .mean(dim=["x", "y"])
                    .values[0]
                )

            except:
                print("uh oh! {} {} doesnt work".format(gcm, scenario))
        # Feels like it would be cleaner if I just took a .sel(x=slice()) approach...
        # but that got messy with the meters units so this feels like
        # it's longer but also more readable

Then we can take the future/historical parts of that array and take a delta
(either aboslutely or relatively) to create an array with just the future
scenarios.


In [None]:
delta_precipitation = (
    precipitation.sel(scenario=["ssp245", "ssp370", "ssp585"])
    / precipitation.sel(scenario="historical")
    * 100
)

In [None]:
delta_tasmax = tasmax.sel(scenario=["ssp245", "ssp370", "ssp585"]) - tasmax.sel(
    scenario="historical"
)
delta_tasmin = tasmin.sel(scenario=["ssp245", "ssp370", "ssp585"]) - tasmin.sel(
    scenario="historical"
)

We have temperature and precip so we'll want to make an x-y scatter plot showing
the changes in climate for each of our different climate simulations. We'll
focus on the downscaled simulations since that's what is actually being fed into
the subsequent drought/insect/fire models. While repeating these analyses for
the raw vs. downscaled datasets would also be relevant, ideally the
downscaling/bias-correction method should preserve the precip/temp deltas and so
the difference between raw and downscaled deltas should be negligble.


In [None]:
# plot changes of precip (%-age), temp (absolute) for different regions and different time periods
### so it'll be 3

In [None]:
# xarray data array and populate labeled array with the historical values and the future values

For the purposes of these analyses we assume that the average temperature at the
surface is the average of maximum and minimum temperatures.


In [None]:
ds.sel(x=slice(35, 50), y=slice(-122, -110))

In [None]:
z1