# What are dask and xarray?
Designed to be run on 4 cores on gadi. Needs access to cj50

In [None]:
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
import time

## xarray labels stuff

In [None]:
sst = xr.open_dataset('/g/data/cj50/access-om2/raw-output/access-om2-01/01deg_jra55v140_iaf/output000/ocean/ocean-2d-surface_temp-1-monthly-mean-ym_1958_01.nc').surface_temp
sst

In [None]:
sst.mean('time')

### xarray is a wrapper for a numpy or dask array

In [None]:
np_array = np.round(np.sin(np.linspace(0,2*np.pi,12)),2)
np_array


In [None]:
plt.figure(figsize=(2,2))
plt.plot(np_array,marker='o')

In [None]:
# xarray wrapped around numpy
xr_array = xr.DataArray(np_array,
                        dims=('time',),
                        coords={'time':np.arange(1,13,dtype=int)})
xr_array

In [None]:
# xarray wrapped around dask
xr_array_dask = xr_array.chunk({'time':3})
xr_array_dask

## What's dask do? (flip back to slides)

In [None]:
client.close()

In [None]:
from dask.distributed import Client
client = Client(threads_per_worker=1,memory_limit=0)
client.amm.start()
client

In [None]:
xr_array.mean('time')

In [None]:
xr_array_dask.mean('time')

In [None]:
xr_array_dask.mean('time').data.visualize()

In [None]:
xr_array_dask.mean('time').load()

## Using ACCESS-OM2-01 data

In [None]:
thetao = xr.open_mfdataset('/g/data/cj50/access-om2/raw-output/access-om2-01/01deg_jra55v140_iaf/output00*/ocean/ocean-3d-temp-1-monthly-mean*.nc',
                          parallel=True)

In [None]:
thetao.temp

In [None]:
thetao.temp.isel(st_ocean=0,yt_ocean=slice(600,1000),
                ).mean('xt_ocean')

## (launch dashboard)

In [None]:
%time thetao.temp.isel(st_ocean=0,yt_ocean=slice(600,1000)).mean('xt_ocean').load()

In [None]:
%time thetao.temp.chunk({'xt_ocean':-1}).isel(st_ocean=0,yt_ocean=slice(600,1000)).mean('xt_ocean').load()

#### Getting that ^ to run but not crash was tricky, and I had to subset a lot. Part of this is the tiny chunks

In [None]:
thetao = xr.open_mfdataset('/g/data/cj50/access-om2/raw-output/access-om2-01/01deg_jra55v140_iaf/output00*/ocean/ocean-3d-temp-1-monthly-mean*.nc',
                           chunks={'xt_ocean':-1,'time':12},
                           parallel=True)

In [None]:
%time thetao.temp.isel(st_ocean=0,yt_ocean=slice(600,1000)).mean('xt_ocean').load()

In [None]:
thetao.temp.isel(st_ocean=0).mean('yt_ocean').load()

In [None]:
# likely to crash the kernel and take ages - proceed at your own risk
thetao = xr.open_mfdataset('/g/data/cj50/access-om2/raw-output/access-om2-01/01deg_jra55v140_iaf/output00*/ocean/ocean-3d-temp-1-monthly-mean*.nc',
                           chunks={'xt_ocean':-1,'st_ocean':-1,'yt_ocean':-1,'time':-1},
                           parallel=True)

In [None]:
%time thetao.temp.isel(st_ocean=0,yt_ocean=slice(600,1000)).mean('xt_ocean').load()