In [None]:
%load_ext nb_black

In [1]:
# Import some python libraries
%matplotlib inline

import numpy as np
import xarray as xr
import matplotlib.pyplot as plt



In [None]:
from dask_gateway import Gateway
from dask.distributed import Client

gateway = Gateway()
options = gateway.cluster_options()
options.worker_cores = 4
cluster = gateway.new_cluster(cluster_options=options)

cluster.scale(48)
from time import sleep
sleep(10)

In [None]:
client = Client(cluster)
client.restart()

---

# Introduction

This notebook demonstrates the performance of `climpred` on large datasets.
Here, we run `bootstrap_perfect_model` and `bootstrap_hindcast` `iteration` times
to calculate a pvalue that initialized skill surpasses uninitialized skill.

---

# perfect_model

## fake data

In [None]:
def gen_pm(degree=5, chunking_dim='lon'):
    ds = xr.Dataset()
    control = xr.Dataset()
    nmember = 10
    ninit = 12
    nlead = 5
    nx = 360//degree
    ny = 180//degree
    control_start = 3000
    control_end = 3300
    ntime = control_end - control_start

    times = xr.cftime_range(
        start=str(control_start),
        periods=ntime,
        freq='YS',
        calendar='noleap',
    )
    leads = np.arange(1, 1 + nlead)
    members = np.arange(1, 1 + nmember)
    inits = xr.cftime_range(
        start=str(control_start),
        periods=ninit,
        freq='10YS',
        calendar='noleap',
    )

    lons = xr.DataArray(
        np.linspace(0.5, 359.5, nx),
        dims=('lon',),
        attrs={'units': 'degrees east', 'long_name': 'longitude'},
    )
    lats = xr.DataArray(
        np.linspace(-89.5, 89.5, ny),
        dims=('lat',),
        attrs={'units': 'degrees north', 'long_name': 'latitude'},
    )
    ds['var'] = xr.DataArray(
                np.random.random(
                    (nmember, ninit, nlead, nx, ny),
                ),
                coords={
                    'member': members,
                    'init': inits,
                    'lon': lons,
                    'lat': lats,
                    'lead': leads,
                },
                dims=('member', 'init', 'lead', 'lon', 'lat'),
                name='var',)

    control['var'] = xr.DataArray(
                np.random.random((ntime, nx, ny)),
                coords={'lon': lons, 'lat': lats, 'time': times},
                dims=('time', 'lon', 'lat'),
                name='var',
                attrs={'units': 'var units', 'description': 'a description'},
            )
    control=control['var'].chunk({'time':'auto'})

    ds.lead.attrs['units'] = 'years'

    ds=ds['var']
    if chunking_dim is not None:
        ds=ds.chunk({chunking_dim:'auto'})
        control=control.chunk({chunking_dim:'auto'})

    return ds, control

ds, control = gen_pm()

## Skill

In [None]:
from climpred.prediction import compute_perfect_model

In [None]:
cp_kwargs = {'metric': 'acc', 'comparison':'m2e'}

In [None]:
%time skill = compute_perfect_model(ds, control, **cp_kwargs)

In [None]:
%time skillc = skill.compute()

## Bootstrap significant skill

In [None]:
from climpred.bootstrap import bootstrap_perfect_model

In [None]:
bootstrap=100

In [None]:
%time bskill = bootstrap_hindcast(hind, hist, obsd, bootstrap=bootstrap, **cp_kwargs)

In [None]:
improved_by_init = bskill.sel(results='skill',kind='init').where(bskill.sel(results='p',kind='uninit') <= 0.05)
%time improved_by_init_c = improved_by_init.compute()

# Hindcast

## fake data

In [None]:
def gen_hind(degree=5,nmember=34,nlead=10, chunking_dim='lon'):
    hind = xr.Dataset()
    observations = xr.Dataset()
    uninit=xr.Dataset()
    nx = 360//degree
    ny = 180//degree
    init_start = 1960
    init_end = 2015
    ninit = init_end - init_start

    times = xr.cftime_range(
        start=str(init_start),
        periods=ninit,
        freq='YS',
        calendar='noleap',
    )
    leads = np.arange(1, 1 + nlead)
    members = np.arange(1, 1 + nmember)
    inits = xr.cftime_range(
        start=str(init_start), end=str(init_end-1),
        freq='YS',
    )

    lons = xr.DataArray(
        np.linspace(0.5, 359.5, nx),
        dims=('lon',),
        attrs={'units': 'degrees east', 'long_name': 'longitude'},
    )
    lats = xr.DataArray(
        np.linspace(-89.5, 89.5, ny),
        dims=('lat',),
        attrs={'units': 'degrees north', 'long_name': 'latitude'},
    )
    hind['var'] = xr.DataArray(
                np.random.random(
                    (nmember, ninit, nlead, nx, ny),
                ),
                coords={
                    'member': members,
                    'init': inits,
                    'lon': lons,
                    'lat': lats,
                    'lead': leads,
                },
                dims=('member', 'init', 'lead', 'lon', 'lat'),
                name='var',)

    observations['var'] = xr.DataArray(
                np.random.random((ninit, nx, ny)),
                coords={'lon': lons, 'lat': lats, 'time': inits},
                dims=('time', 'lon', 'lat'),
                name='var',
                attrs={'units': 'var units', 'description': 'a description'},
            )
    uninit['var'] = xr.DataArray(
                np.random.random((ninit, nx, ny, nmember)),
                coords={'lon': lons, 'lat': lats, 'time': inits, 'member': members},
                dims=('time', 'lon', 'lat', 'member'),
                name='var')

    if chunking_dim is not None:
        observations=observations.chunk({chunking_dim:'auto'})
        uninit=uninit.chunk({chunking_dim:'auto'})
        hind=hind.chunk({chunking_dim:'auto'})

    observations=observations['var']
    uninit=uninit['var']
    hind=hind['var']
    hind.lead.attrs['units'] = 'years'

    return hind,uninit,obs

hind,hist,obs = gen_hind()

## Skill

In [None]:
from climpred.prediction import compute_hindcast

In [None]:
cp_kwargs = {'metric': 'acc', 'comparison':'e2r'}

In [None]:
%time skill = compute_hindcast(hind, obs, **cp_kwargs)

In [None]:
%time skillc = skill.compute()

## Bootstrap significant skill

In [None]:
from climpred.bootstrap import bootstrap_hindcast

In [None]:
bootstrap=100

In [None]:
%time bskill = bootstrap_hindcast(hind, hist, obs, bootstrap=bootstrap, **cp_kwargs)

In [None]:
improved_by_init = bskill.sel(results='skill',kind='init').where(bskill.sel(results='p',kind='uninit') <= 0.05)
%time improved_by_init_c = improved_by_init.compute()

# Close down

In [None]:
client.close()
cluster.close()