# Try using `climpred` to recreate some results from the CanESM5 decadal prediction paper

In [1]:
import cftime

import numpy as np
import xarray as xr

from src import plot
import matplotlib.pyplot as plt

import climpred
from climpred import HindcastEnsemble

In [2]:
%load_ext autoreload
%autoreload 2
%load_ext lab_black

In [3]:
xr.set_options(keep_attrs=True)

<xarray.core.options.set_options at 0x150b1d6f3d60>

## Load some data to process

In [12]:
DATA_DIR = "../../data/processed/"

hcst_dataset = "CanESM5.annual.anom_1991-2020"
hist_dataset = "CanESM5_hist.annual.anom_1991-2020"
obsv_dataset = "HadISST.annual.anom_1991-2020"

variable = "sst"

In [13]:
hcst = xr.open_zarr(f"{DATA_DIR}/{hcst_dataset}.{variable}.zarr")
hist = xr.open_zarr(
    f"{DATA_DIR}/{hist_dataset}.{variable}.zarr"
).unify_chunks()  # can delete
obsv = xr.open_zarr(f"{DATA_DIR}/{obsv_dataset}.{variable}.zarr")

hcst = hcst.drop("time")  # can delete
hcst.lead.attrs = {"units": "months"}  # can delete

1. Consolidating metadata in this existing store with zarr.consolidate_metadata().
2. Explicitly setting consolidated=False, to avoid trying to read consolidate metadata, or
3. Explicitly setting consolidated=True, to raise an error in this case instead of falling back to try reading non-consolidated metadata.
  hcst = xr.open_zarr(f"{DATA_DIR}/{hcst_dataset}.{variable}.zarr")


GroupNotFoundError: group not found at path ''

In [6]:
hcst_full = xr.open_zarr(f"{DATA_DIR}/{hcst_dataset}.{variable}.annual.full.zarr")
hist_full = xr.open_zarr(
    f"{DATA_DIR}/{hist_dataset}.{variable}.annual.full.zarr"
).unify_chunks()  # can delete
obsv_full = xr.open_zarr(f"{DATA_DIR}/{obsv_dataset}.{variable}.annual.full.zarr")

hcst_full = hcst_full.drop("time")
hcst_full.lead.attrs = {"units": "months"}  # can delete

In [7]:
def global_mean(ds):
    return ds.weighted(ds["area"]).mean(["lon", "lat"]).compute()

In [8]:
hcst_gbl = global_mean(hcst)
hist_gbl = global_mean(hist)
obsv_gbl = global_mean(obsv)

In [9]:
hcst_full_gbl = global_mean(hcst_full)
hist_full_gbl = global_mean(hist_full)
obsv_full_gbl = global_mean(obsv_full)

ValueError: `weights` cannot contain missing values. Missing values can be replaced by `weights.fillna(0)`.

## How does climpred bias correction compare to anomalisation?

In [None]:
hindcast = HindcastEnsemble(hcst)
hindcast = hindcast.add_observations(obsv)
hindcast = hindcast.add_uninitialized(hist.chunk({"time": -1}))  # can delete

hindcast_gbl = HindcastEnsemble(hcst_gbl)
hindcast_gbl = hindcast_gbl.add_observations(obsv_gbl)
hindcast_gbl = hindcast_gbl.add_uninitialized(hist_gbl)

In [None]:
hindcast_full_gbl = HindcastEnsemble(hcst_full_gbl)
hindcast_full_gbl = hindcast_full_gbl.add_observations(obsv_full_gbl)
hindcast_full_gbl = hindcast_full_gbl.add_uninitialized(hist_full_gbl)

hindcast_cpbc_gbl = hindcast_full_gbl.remove_bias(
    alignment="same_verifs", how="additive_mean"
)

In [None]:
fig = plot.hindcasts(
    {"CAFE-f6 bias corrected": hindcast_cpbc_gbl.get_initialized().mean("member")},
    {"Observations": hindcast_cpbc_gbl.get_observations()},
    shade=True,
)
fig.suptitle("climpred bias correction")

fig = plot.hindcasts(
    {"CAFE-f6 anomalies": hindcast_gbl.get_initialized().mean("member")},
    {"Observed anomalies": hindcast_gbl.get_observations()},
    {"CAFE historical anomalies": hindcast_gbl.get_uninitialized().mean("member")},
    shade=True,
)
_ = fig.suptitle("1991-2020 anomalies")

## Compute a skill score

In [10]:
import logging

logger = logging.getLogger()
logger.setLevel(logging.INFO)

In [11]:
acc = hindcast.verify(
    reference="uninitialized",
    metric="pearson_r",
    comparison="e2o",
    alignment="same_verifs",
    dim=["init"],
)

NameError: name 'hindcast' is not defined

In [68]:
test

Unnamed: 0,Array,Chunk
Bytes,1.98 MiB,63.28 kiB
Shape,"(2, 10, 90, 144)","(1, 1, 90, 90)"
Count,370 Tasks,40 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 1.98 MiB 63.28 kiB Shape (2, 10, 90, 144) (1, 1, 90, 90) Count 370 Tasks 40 Chunks Type float64 numpy.ndarray",2  1  144  90  10,

Unnamed: 0,Array,Chunk
Bytes,1.98 MiB,63.28 kiB
Shape,"(2, 10, 90, 144)","(1, 1, 90, 90)"
Count,370 Tasks,40 Chunks
Type,float64,numpy.ndarray
