In [None]:
%load_ext autoreload
%autoreload 2

import gstools as gs
import intake
import os
import zarr
import pandas as pd
import xarray as xr
import intake_esm
import numpy as np
from dask.distributed import Client
from cmip6_downscaling import CLIMATE_NORMAL_PERIOD
from cmip6_downscaling.constants import KELVIN, PERCENT, SEC_PER_DAY
import rioxarray
from rasterio.enums import Resampling
from cmip6_downscaling.workflows.share import (
    chunks,
    future_time,
    get_cmip_runs,
    hist_time,
    xy_region,
)
from cmip6_downscaling.workflows.utils import get_store
import matplotlib.pyplot as plt
intake_esm.__version__

In [2]:
import skdownscale

In [3]:
skdownscale.__file__

# access GCM data


In [4]:
from cmip6_downscaling.data.cmip import gcm_munge

In [5]:
activity_ids = ["CMIP", "ScenarioMIP"]
experiment_ids = ["historical", "ssp370"]  # , "ssp126", "ssp245",  "ssp585"
member_ids = ["r1i1p1f1"]
source_ids = ["CanESM5"]  # BCC-CSM2-MR"]
table_ids = ["day"]
grid_labels = ["gn"]
variables = "tasmax"
variable_ids = [variables]  # tasmax, tasmin, pr

In [6]:
col_url = (
    "https://cmip6downscaling.blob.core.windows.net/cmip6/pangeo-cmip6.json"
)

col = intake.open_esm_datastore(col_url)
full_subset = col.search(
    activity_id=activity_ids,
    experiment_id=experiment_ids,
    member_id=member_ids,
    table_id=table_ids,
    grid_label=grid_labels,
    variable_id=variable_ids,
    source_id=source_ids,
)

In [7]:
gcm_ds_dict = full_subset.to_dataset_dict(
    zarr_kwargs={
        "consolidated": True,
        "decode_times": True,
        "use_cftime": True,
    },
    storage_options={
        "account_name": "cmip6downscaling",
        "account_key": os.environ.get("AccountKey", None),
    },
)

In [8]:
keys = gcm_ds_dict.keys()
historical_gcm = gcm_munge(
    gcm_ds_dict[[k for k in keys if "historical" in k][0]]
)
future_gcm = gcm_munge(gcm_ds_dict[[k for k in keys if "ssp" in k][0]])

In [9]:
historical_gcm

Unnamed: 0,Array,Chunk
Bytes,1.97 GB,61.57 MB
Shape,"(60225, 64, 128)","(1879, 64, 128)"
Count,133 Tasks,33 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.97 GB 61.57 MB Shape (60225, 64, 128) (1879, 64, 128) Count 133 Tasks 33 Chunks Type float32 numpy.ndarray",128  64  60225,

Unnamed: 0,Array,Chunk
Bytes,1.97 GB,61.57 MB
Shape,"(60225, 64, 128)","(1879, 64, 128)"
Count,133 Tasks,33 Chunks
Type,float32,numpy.ndarray


In [10]:
future_gcm

Unnamed: 0,Array,Chunk
Bytes,1.03 GB,61.70 MB
Shape,"(31390, 64, 128)","(1883, 64, 128)"
Count,69 Tasks,17 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.03 GB 61.70 MB Shape (31390, 64, 128) (1883, 64, 128) Count 69 Tasks 17 Chunks Type float32 numpy.ndarray",128  64  31390,

Unnamed: 0,Array,Chunk
Bytes,1.03 GB,61.70 MB
Shape,"(31390, 64, 128)","(1883, 64, 128)"
Count,69 Tasks,17 Chunks
Type,float32,numpy.ndarray


# access obs data


In [11]:
# converts cmip standard names to ERA5 names
variable_name_dict = {
    "tasmax": "air_temperature_at_2_metres_1hour_Maximum",
    "tasmin": "air_temperature_at_2_metres_1hour_Minimum",
    "pr": "precipitation_amount_1hour_Accumulation",
}

# specify spatial regional subset and time periods


In [12]:
from cmip6_downscaling.data.cmip import convert_to_360

# parameters
historical_start = "2010"
historical_end = "2014"
future_start = "2015"
future_end = "2019"
min_lat = 19
max_lat = 55
min_lon = 227
max_lon = 299

# chunk shape for dask execution (time must be contiguous, ie -1)
chunks = {"lat": 10, "lon": 10, "time": -1}

In [13]:
obs = xr.open_zarr("obs_buffer.zarr")
obs

Unnamed: 0,Array,Chunk
Bytes,772.72 MB,400.00 kB
Shape,"(3652, 169, 313)","(1000, 10, 10)"
Count,2177 Tasks,2176 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 772.72 MB 400.00 kB Shape (3652, 169, 313) (1000, 10, 10) Count 2177 Tasks 2176 Chunks Type float32 numpy.ndarray",313  169  3652,

Unnamed: 0,Array,Chunk
Bytes,772.72 MB,400.00 kB
Shape,"(3652, 169, 313)","(1000, 10, 10)"
Count,2177 Tasks,2176 Chunks
Type,float32,numpy.ndarray


In [14]:
historical_period = slice(historical_start, historical_end)
future_period = slice(future_start, future_end)

In [15]:
from cmip6_downscaling.data.observations import get_coarse_obs


def maca_preprocess(
    historical_gcm,
    future_gcm,
    obs,
    min_lon,
    max_lon,
    min_lat,
    max_lat,
):
    lon_slice = slice(convert_to_360(min_lon), convert_to_360(max_lon))
    lat_slice = slice(max_lat, min_lat)
    full_gcm = xr.combine_by_coords(
        [
            historical_gcm.sel(lon=lon_slice, lat=lat_slice),
            future_gcm.sel(lon=lon_slice, lat=lat_slice),
        ],
        combine_attrs="drop",
    ).rio.write_crs("EPSG:4326")
    obs = obs.chunk({"lat": -1, "lon": -1, "time": 1})
    coarse_obs = get_coarse_obs(
        obs=obs, gcm_ds_single_time_slice=full_gcm.isel(time=0)
    )
    return full_gcm, coarse_obs

  if num_peri_dims is not 0:
  if num_peri_dims is not 0:
  if len(staggerloc) is 1:
  assert (len(slc) is 3)
  if meshname is not "":
  if varname is not "":


In [16]:
full_gcm, coarse_obs = maca_preprocess(
    historical_gcm=historical_gcm.sel(time=historical_period),
    future_gcm=future_gcm.sel(time=future_period),
    obs=obs,
    min_lon=min_lon,
    max_lon=max_lon,
    min_lat=min_lat,
    max_lat=max_lat,
)

## bias correction


In [None]:
# bias correct future gcm and historical gcm to observation according to bias correction method specified 
# transform coarse obs if specified by bias correction method 

# fit & predict with Pointwise GARD model 
# fit --> X = coarse obs, y = fine obs 
# predict --> X = coarse historical or future gcm 

# do error calculations and perturb the answers as needed 


# Run GARD model


In [38]:
from skdownscale.pointwise_models import PureAnalog
from skdownscale.pipelines.gard_wrapper import GardWrapper

gard_model = PureAnalog(kind="mean_analogs", n_analogs=10)
gard_wrapper = GardWrapper(
    model=gard_model, feature_list=["tasmax"], dim="time"
)

In [51]:
gcm_train.transpose("time", "lat", "lon").load()
obs.transpose("time", "lat", "lon").load()

In [57]:
model = gard_wrapper.fit(X=gcm_train, y=obs)

# Step 4: Predict for future (it's already on the coarse scale)


In [62]:
future = (
    full_gcm[[variable]]
    .sel(time=holdout_slice, lat=lat_slice, lon=lon_slice)
    .load()
)  # .rio.write_crs('EPSG:4326')
# future = conform_to_reproject(future).load()
future

In [64]:
future_downscaled = model.predict(future)

In [65]:
future_downscaled

In [68]:
future_downscaled.isel(
    time=3
).plot()  # this downscaling didn't work very well XD