In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
%matplotlib inline
from matplotlib import pyplot as plt

In [3]:
import sys
import os
from subprocess import call 
import pathlib

In [4]:
import numpy as np
import pandas as pd
import xarray as xr

In [5]:
from dask.distributed import Client
from dask.diagnostics import ProgressBar

In [6]:
sys.path.append("../code/")

In [7]:
import src

### parameters for papermill

In [8]:
domain = 'NZ'
ndays = 1
roll = 15
climatology = [1991, 2020]
quantiles = [0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.99]
ipath = '/media/nicolasf/END19101/data/OISST/daily/'
opath = '../outputs/'

In [9]:
ipath = pathlib.Path(ipath).joinpath(domain)
opath = pathlib.Path(opath).joinpath(domain)

In [10]:
opath.mkdir(exist_ok=True)

### create cluster

In [11]:
client = Client(
    n_workers=8, threads_per_worker=4, memory_limit="4GB", local_directory="./dask"
)

In [12]:
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 8
Total threads: 32,Total memory: 29.80 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:34363,Workers: 8
Dashboard: http://127.0.0.1:8787/status,Total threads: 32
Started: Just now,Total memory: 29.80 GiB

0,1
Comm: tcp://127.0.0.1:37975,Total threads: 4
Dashboard: http://127.0.0.1:35737/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:39189,
Local directory: /home/nicolasf/operational/OISST_indices/notebooks/dask/dask-worker-space/worker-tji_bt8_,Local directory: /home/nicolasf/operational/OISST_indices/notebooks/dask/dask-worker-space/worker-tji_bt8_

0,1
Comm: tcp://127.0.0.1:42191,Total threads: 4
Dashboard: http://127.0.0.1:39045/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:40221,
Local directory: /home/nicolasf/operational/OISST_indices/notebooks/dask/dask-worker-space/worker-n0x5rc5n,Local directory: /home/nicolasf/operational/OISST_indices/notebooks/dask/dask-worker-space/worker-n0x5rc5n

0,1
Comm: tcp://127.0.0.1:43165,Total threads: 4
Dashboard: http://127.0.0.1:38507/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:44409,
Local directory: /home/nicolasf/operational/OISST_indices/notebooks/dask/dask-worker-space/worker-lis_c8wg,Local directory: /home/nicolasf/operational/OISST_indices/notebooks/dask/dask-worker-space/worker-lis_c8wg

0,1
Comm: tcp://127.0.0.1:33455,Total threads: 4
Dashboard: http://127.0.0.1:46865/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:38083,
Local directory: /home/nicolasf/operational/OISST_indices/notebooks/dask/dask-worker-space/worker-30txb_kf,Local directory: /home/nicolasf/operational/OISST_indices/notebooks/dask/dask-worker-space/worker-30txb_kf

0,1
Comm: tcp://127.0.0.1:35751,Total threads: 4
Dashboard: http://127.0.0.1:43621/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:45305,
Local directory: /home/nicolasf/operational/OISST_indices/notebooks/dask/dask-worker-space/worker-aykpc10q,Local directory: /home/nicolasf/operational/OISST_indices/notebooks/dask/dask-worker-space/worker-aykpc10q

0,1
Comm: tcp://127.0.0.1:43367,Total threads: 4
Dashboard: http://127.0.0.1:43759/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:44821,
Local directory: /home/nicolasf/operational/OISST_indices/notebooks/dask/dask-worker-space/worker-9675mt8x,Local directory: /home/nicolasf/operational/OISST_indices/notebooks/dask/dask-worker-space/worker-9675mt8x

0,1
Comm: tcp://127.0.0.1:45597,Total threads: 4
Dashboard: http://127.0.0.1:33903/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:46301,
Local directory: /home/nicolasf/operational/OISST_indices/notebooks/dask/dask-worker-space/worker-6_t757yz,Local directory: /home/nicolasf/operational/OISST_indices/notebooks/dask/dask-worker-space/worker-6_t757yz

0,1
Comm: tcp://127.0.0.1:40585,Total threads: 4
Dashboard: http://127.0.0.1:38925/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:32835,
Local directory: /home/nicolasf/operational/OISST_indices/notebooks/dask/dask-worker-space/worker-sjes113h,Local directory: /home/nicolasf/operational/OISST_indices/notebooks/dask/dask-worker-space/worker-sjes113h


In [13]:
sys.path.append('../code/')

In [14]:
import src

In [15]:
lfiles = list(ipath.glob("sst.day.mean.????.v2.nc"))

In [16]:
lfiles.sort()

In [17]:
lfiles[0]

PosixPath('/media/nicolasf/END19101/data/OISST/daily/NZ/sst.day.mean.1981.v2.nc')

In [18]:
lfiles[-1]

PosixPath('/media/nicolasf/END19101/data/OISST/daily/NZ/sst.day.mean.2022.v2.nc')

In [19]:
dset = xr.open_mfdataset(lfiles, parallel=True, combine_attrs="drop_conflicts", compat="override", coords=['time'])

In [20]:
dset

Unnamed: 0,Array,Chunk
Bytes,328.80 MiB,8.04 MiB
Shape,"(14964, 80, 72)","(366, 80, 72)"
Count,126 Tasks,42 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 328.80 MiB 8.04 MiB Shape (14964, 80, 72) (366, 80, 72) Count 126 Tasks 42 Chunks Type float32 numpy.ndarray",72  80  14964,

Unnamed: 0,Array,Chunk
Bytes,328.80 MiB,8.04 MiB
Shape,"(14964, 80, 72)","(366, 80, 72)"
Count,126 Tasks,42 Chunks
Type,float32,numpy.ndarray


### if regions is 'Ninos' or 'IOD', then first calculates the regional averages

In [21]:
if domain == 'Ninos':
    
    dset['sst'] = src.calculates_ninos(dset['sst'], nino='all')
    
elif domain == 'IOD': 
    
    dset['sst']= src.calculates_IOD_nodes(dset['sst'], IOD_node='all')

In [22]:
dset

Unnamed: 0,Array,Chunk
Bytes,328.80 MiB,8.04 MiB
Shape,"(14964, 80, 72)","(366, 80, 72)"
Count,126 Tasks,42 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 328.80 MiB 8.04 MiB Shape (14964, 80, 72) (366, 80, 72) Count 126 Tasks 42 Chunks Type float32 numpy.ndarray",72  80  14964,

Unnamed: 0,Array,Chunk
Bytes,328.80 MiB,8.04 MiB
Shape,"(14964, 80, 72)","(366, 80, 72)"
Count,126 Tasks,42 Chunks
Type,float32,numpy.ndarray


### remove the 29th of Feb

In [23]:
dset = dset.convert_calendar('noleap')

### calculates the rolling averages if needed 

In [24]:
if ndays > 1: 
    
    dset['sst'] = dset['sst'].rolling({'time':ndays}, center=False, min_periods=ndays).mean('time')
    
    dset = dset.isel(time=slice(ndays+1,None))

In [25]:
dset

Unnamed: 0,Array,Chunk
Bytes,328.58 MiB,8.02 MiB
Shape,"(14954, 80, 72)","(365, 80, 72)"
Count,295 Tasks,42 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 328.58 MiB 8.02 MiB Shape (14954, 80, 72) (365, 80, 72) Count 295 Tasks 42 Chunks Type float32 numpy.ndarray",72  80  14954,

Unnamed: 0,Array,Chunk
Bytes,328.58 MiB,8.02 MiB
Shape,"(14954, 80, 72)","(365, 80, 72)"
Count,295 Tasks,42 Chunks
Type,float32,numpy.ndarray


### now expands the dataset along a dummy dimension 

In [26]:
dset_roll = dset[['sst']].copy()

In [27]:
dset_roll = dset_roll.rolling({'time':roll}, center=True, min_periods=roll).construct(window_dim='roll')

### selects the climatological period 

In [28]:
clim = dset_roll.sel(time=slice(str(climatology[0]), str(climatology[1])))

In [29]:
clim

Unnamed: 0,Array,Chunk
Bytes,3.52 GiB,120.30 MiB
Shape,"(10950, 80, 72, 15)","(365, 80, 72, 15)"
Count,630 Tasks,31 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.52 GiB 120.30 MiB Shape (10950, 80, 72, 15) (365, 80, 72, 15) Count 630 Tasks 31 Chunks Type float32 numpy.ndarray",10950  1  15  72  80,

Unnamed: 0,Array,Chunk
Bytes,3.52 GiB,120.30 MiB
Shape,"(10950, 80, 72, 15)","(365, 80, 72, 15)"
Count,630 Tasks,31 Chunks
Type,float32,numpy.ndarray


### transpose, then rechunks 

In [30]:
if domain == 'Ninos': 
    
    clim = clim.transpose(*['time','roll','nino'])
    clim = clim.chunk({'time':-1, 'roll':-1, 'nino':len(clim['nino'])})
    
elif domain == 'IOD': 

    clim = clim.transpose(*['time','roll','IOD'])
    clim = clim.chunk({'time':-1, 'roll':-1, 'IOD':len(clim['IOD'])})
    
else: 
    
    clim = clim.transpose(*['time','roll','lat','lon'])
    clim = clim.chunk({'time':-1, 'roll':-1, 'lat':2, 'lon':20})

In [31]:
clim

Unnamed: 0,Array,Chunk
Bytes,3.52 GiB,25.06 MiB
Shape,"(10950, 15, 80, 72)","(10950, 15, 2, 20)"
Count,1850 Tasks,160 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.52 GiB 25.06 MiB Shape (10950, 15, 80, 72) (10950, 15, 2, 20) Count 1850 Tasks 160 Chunks Type float32 numpy.ndarray",10950  1  72  80  15,

Unnamed: 0,Array,Chunk
Bytes,3.52 GiB,25.06 MiB
Shape,"(10950, 15, 80, 72)","(10950, 15, 2, 20)"
Count,1850 Tasks,160 Chunks
Type,float32,numpy.ndarray


In [32]:
clim_q = clim.groupby(clim.time.dt.dayofyear).quantile(quantiles, dim=['time','roll'])

In [33]:
clim_ave = clim.groupby(clim.time.dt.dayofyear).mean(dim=['time','roll'])

In [34]:
clim_std = clim.groupby(clim.time.dt.dayofyear).std(dim=['time','roll'])

In [35]:
with ProgressBar(): 
    clim_q = clim_q.compute()
    clim_ave = clim_ave.compute()
    clim_std = clim_std.compute()

In [36]:
clim_q = clim_q.rename({'sst':'quantiles'})

In [37]:
clim_ave

In [38]:
clim_q['average'] = clim_ave['sst']

In [39]:
clim_q

In [40]:
clim_q['std'] = clim_std['sst']

### saves to ZARR

In [41]:
clim_q.to_zarr(opath.joinpath(f'{domain}_OISST_{ndays}days_climatology_{roll}_window.zarr'))

<xarray.backends.zarr.ZarrStore at 0x7fb3543f54a0>