In [None]:
%load_ext autoreload
%autoreload 2

# Unbiased ECMWF

Here we propose a small model which is a debiased ECMWF forecast according to the data we have.
Compute and score the climatology, to use as a baseline.

In [None]:
import dask
import dask.array as da
import dask.distributed
import dask_jobqueue
import datetime
import matplotlib.pyplot as plt
import numpy as np
import pathlib
import scipy.stats
import xarray as xr

In [None]:
from crims2s.util import fix_dataset_dims

In [None]:
INPUT_TRAIN = '***BASEDIR***training-input/0.3.0/netcdf'
OBSERVATIONS = '***BASEDIR***training-output-reference/'

## Boost dask cluster

In [None]:
cluster = dask_jobqueue.SLURMCluster(
    env_extra=['source ***HOME***.bash_profile','conda activate s2s'],
)

In [None]:
cluster.scale(jobs=3)  # Scale to two working nodes as configured.
client = dask.distributed.Client(cluster)

In [None]:
client

# Open model data

In [None]:
CENTER = 'ecmwf'
FIELD = 't2m'

In [None]:
input_path = pathlib.Path(INPUT_TRAIN)

In [None]:
files = sorted([f for f in input_path.iterdir() if CENTER in f.stem and FIELD in f.stem])

In [None]:
files[:10]

In [None]:
one_ecmwf = xr.open_dataset(files[0])

In [None]:
one_ecmwf

In [None]:
fix_dataset_dims(one_ecmwf)

In [None]:
ecmwf = xr.open_mfdataset(files, preprocess=fix_dataset_dims)

In [None]:
ecmwf

In [None]:
ecmwf_train = ecmwf.sel(forecast_year=slice(None, 2018))
ecmwf_val = ecmwf.sel(forecast_year=slice(2019, None))

In [None]:
ecmwf_train

In [None]:
ecmwf_val

In [None]:
ecmwf_mean = ecmwf.mean(dim='realization')
ecmwf_std = ecmwf.std(dim='realization')

In [None]:
ecmwf_mean

## Open observations

In [None]:
obs_path = pathlib.Path(OBSERVATIONS)
obs_files = [f for f in obs_path.iterdir() if 't2m' in f.stem]

In [None]:
obs = xr.open_mfdataset(obs_files, preprocess=fix_dataset_dims).isel(lead_time=slice(1, None))

In [None]:
obs

In [None]:
obs_train = obs.sel(forecast_year=slice(None, 2018))
obs_val = obs.sel(forecast_year=slice(2019, None))

In [None]:
obs_train

In [None]:
climatology_mean = obs_weeks34.mean(dim=['forecast_year', 'lead_time']).compute()

In [None]:
climatology_std = obs_weeks34.std(dim=['forecast_year', 'lead_time']).compute()

In [None]:
climatology_mean.isel(forecast_dayofyear=0).t2m.plot()

In [None]:
climatology_std.isel(forecast_dayofyear=0).t2m.plot()

## Compute model bias

In [None]:
ecmwf_train_mean = ecmwf_train.mean(dim='realization')

In [None]:
ecmwf_train_mean

In [None]:
ecmwf_train_bias = ecmwf_train_mean - obs_train

In [None]:
ecmwf_train_bias_week34 = ecmwf_train_bias.sel(lead_time=slice(datetime.timedelta(days=14), datetime.timedelta(days=28))).mean(dim=['forecast_year', 'lead_time'])

In [None]:
ecmwf_train_bias_week34

In [None]:
ecmwf_train_bias_week34.isel(forecast_dayofyear=10).t2m.plot()

In [None]:
ecmwf_val_w34 = ecmwf_val.sel(lead_time=slice('14D', '27D'))

In [None]:
ecmwf_val_w34

In [None]:
ecmwf_val_w34_debiased = ecmwf_val_w34 - ecmwf_train_bias_week34

In [None]:
ecmwf_val_w34_debiased_mean = ecmwf_val_w34_debiased.mean(dim=['realization', 'lead_time'])

In [None]:
ecmwf_val_w34_debiased_mean

In [None]:
ecmwf_val_w34_debiased_std = ecmwf_val_w34_debiased.std(dim=['realization', 'lead_time'])

In [None]:
ecmwf_val_w34_debiased_std

In [None]:
obs_train

In [None]:
obs_val - ecmwf_val_w34_debiased_mean

In [None]:
obs_val_w34 = obs_val.sel(lead_time=slice('14D', '27D'))

In [None]:
obs_val_w34

In [None]:
obs_train_w34 = obs_train.sel(lead_time=slice('14D', '27D'))

In [None]:
obs_train_w34

In [None]:
thresholds = obs_train_w34.quantile([.33, .66], dim=['forecast_year', 'lead_time'])

In [None]:
thresholds

In [None]:
thresholds.isel(quantile=1, forecast_dayofyear=0).t2m.plot()

In [None]:
ecmwf_val_w34_debiased_mean
ecmwf_val_w34_debiased_std

In [None]:
forecast_distribution = scipy.stats.norm(loc=ecmwf_val_w34_debiased_mean.t2m.data, scale=ecmwf_val_w34_debiased_std.t2m.data)

In [None]:
p_below = forecast_distribution.cdf(thresholds.isel(quantile=0).t2m.data)

In [None]:
p_below = _

In [None]:
p_below.isnan()

In [None]:
np.isnan(p_below).sum()

In [None]:
p_below.shape

In [None]:
fig, ax = plt.subplots()
cax = ax.imshow(p_below[0,30])
fig.colorbar(cax, ax=ax)

In [None]:
p_above = forecast_distribution.cdf(thresholds.isel(quantile=1).t2m.data)

In [None]:
p_normal = p_above - p_below

In [None]:
p_normal.shape

In [None]:
p_above = np.ones_like(p_above) - p_above

In [None]:
(p_above + p_normal + p_below)[0, 0][~np.isnan(p_above)[0, 0]].min()

In [None]:
fig, ax = plt.subplots()
ax.imshow(p_normal[0, 0])

In [None]:
forecast = np.stack([p_below, p_normal, p_above])

In [None]:
forecast.shape

In [None]:
forecast = xr.DataArray(forecast, dims=['category', 'forecast_year', 'forecast_dayofyear', 'latitude', 'longitude'], coords={
    'category': xr.DataArray(['below normal', 'within normal', 'above normal'], dims='category'),
})

In [None]:
forecast = forecast.assign_coords({
    'forecast_dayofyear': obs_train_w34.forecast_dayofyear,
    'latitude': obs_train_w34.latitude,
    'longitude': obs_train_w34.longitude,
    'forecast_time': obs_val_w34.forecast_time
})

In [None]:
dataset = xr.Dataset({'t2m': forecast})

In [None]:
dataset

In [None]:
dataset.t2m.isel(category=2,  forecast_dayofyear=0).plot()