In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
%matplotlib inline
from matplotlib import pyplot as plt

In [3]:
import sys
import os
from subprocess import call 
import pathlib

In [4]:
import numpy as np
import pandas as pd
import xarray as xr

In [5]:
from dask.distributed import Client
from dask.diagnostics import ProgressBar

In [6]:
sys.path.append("../code/")

In [7]:
import src

### parameters for papermill

In [8]:
domain = 'Ninos'
ndays = 30
roll = 15
# climatology = [1991, 2020]
climatology = [1961, 1990]
quantiles = [0.1, 0.25, 0.5, 0.75, 0.9]

### create cluster

In [9]:
client = Client(
    n_workers=6, threads_per_worker=4, memory_limit="8GB", local_directory="./dask"
)

In [10]:
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 6
Total threads: 24,Total memory: 44.70 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:40787,Workers: 6
Dashboard: http://127.0.0.1:8787/status,Total threads: 24
Started: Just now,Total memory: 44.70 GiB

0,1
Comm: tcp://127.0.0.1:37951,Total threads: 4
Dashboard: http://127.0.0.1:44599/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:33277,
Local directory: /home/nicolasf/operational/OISST_indices/notebooks/dask/dask-worker-space/worker-1ivd6puf,Local directory: /home/nicolasf/operational/OISST_indices/notebooks/dask/dask-worker-space/worker-1ivd6puf

0,1
Comm: tcp://127.0.0.1:46467,Total threads: 4
Dashboard: http://127.0.0.1:46741/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:43805,
Local directory: /home/nicolasf/operational/OISST_indices/notebooks/dask/dask-worker-space/worker-zy6vgns3,Local directory: /home/nicolasf/operational/OISST_indices/notebooks/dask/dask-worker-space/worker-zy6vgns3

0,1
Comm: tcp://127.0.0.1:34221,Total threads: 4
Dashboard: http://127.0.0.1:43377/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:41079,
Local directory: /home/nicolasf/operational/OISST_indices/notebooks/dask/dask-worker-space/worker-cvao2gjq,Local directory: /home/nicolasf/operational/OISST_indices/notebooks/dask/dask-worker-space/worker-cvao2gjq

0,1
Comm: tcp://127.0.0.1:40889,Total threads: 4
Dashboard: http://127.0.0.1:43529/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:36383,
Local directory: /home/nicolasf/operational/OISST_indices/notebooks/dask/dask-worker-space/worker-02nv05p2,Local directory: /home/nicolasf/operational/OISST_indices/notebooks/dask/dask-worker-space/worker-02nv05p2

0,1
Comm: tcp://127.0.0.1:35337,Total threads: 4
Dashboard: http://127.0.0.1:42811/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:45107,
Local directory: /home/nicolasf/operational/OISST_indices/notebooks/dask/dask-worker-space/worker-tgclovha,Local directory: /home/nicolasf/operational/OISST_indices/notebooks/dask/dask-worker-space/worker-tgclovha

0,1
Comm: tcp://127.0.0.1:36179,Total threads: 4
Dashboard: http://127.0.0.1:44465/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:36771,
Local directory: /home/nicolasf/operational/OISST_indices/notebooks/dask/dask-worker-space/worker-3937q7e4,Local directory: /home/nicolasf/operational/OISST_indices/notebooks/dask/dask-worker-space/worker-3937q7e4


In [11]:
sys.path.append('../code/')

In [12]:
import src

In [13]:
opath = pathlib.Path(f'/media/nicolasf/END19101/data/OISST/daily/{domain}')

In [14]:
lfiles = list(opath.glob("sst.day.mean.????.v2.nc"))

In [15]:
lfiles.sort()

In [16]:
lfiles[0]

PosixPath('/media/nicolasf/END19101/data/OISST/daily/Ninos/sst.day.mean.1981.v2.nc')

In [17]:
lfiles[-1]

PosixPath('/media/nicolasf/END19101/data/OISST/daily/Ninos/sst.day.mean.2022.v2.nc')

In [18]:
dset = xr.open_mfdataset(lfiles, parallel=True, combine_attrs="drop_conflicts", compat="override", coords=['time'])

In [19]:
dset

Unnamed: 0,Array,Chunk
Bytes,1.20 GiB,30.16 MiB
Shape,"(14954, 60, 360)","(366, 60, 360)"
Count,126 Tasks,42 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.20 GiB 30.16 MiB Shape (14954, 60, 360) (366, 60, 360) Count 126 Tasks 42 Chunks Type float32 numpy.ndarray",360  60  14954,

Unnamed: 0,Array,Chunk
Bytes,1.20 GiB,30.16 MiB
Shape,"(14954, 60, 360)","(366, 60, 360)"
Count,126 Tasks,42 Chunks
Type,float32,numpy.ndarray


### if regions is 'Ninos' or 'IOD', then first calculates the regional averages

In [20]:
if domain == 'Ninos':
    
    dset['sst'] = src.calculates_ninos(dset['sst'], nino='all')
    
elif domain == 'IOD': 
    
    dset['sst']= src.calculates_IOD_nodes(dset['sst'], IOD_node='all')

In [21]:
dset

Unnamed: 0,Array,Chunk
Bytes,292.07 kiB,1.43 kiB
Shape,"(5, 14954)","(1, 366)"
Count,1008 Tasks,210 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 292.07 kiB 1.43 kiB Shape (5, 14954) (1, 366) Count 1008 Tasks 210 Chunks Type float32 numpy.ndarray",14954  5,

Unnamed: 0,Array,Chunk
Bytes,292.07 kiB,1.43 kiB
Shape,"(5, 14954)","(1, 366)"
Count,1008 Tasks,210 Chunks
Type,float32,numpy.ndarray


### remove the 29th of Feb

In [22]:
# standard_calendar = pd.date_range(start=first_day, end=last_day, freq="D")

In [23]:
dset = dset.convert_calendar('noleap')

### calculates the rolling averages if needed 

In [24]:
if ndays > 1: 
    
    dset['sst'] = dset['sst'].rolling({'time':ndays}, center=False, min_periods=ndays).mean('time')
    
    dset = dset.isel(time=slice(ndays+1,None))

In [25]:
dset

Unnamed: 0,Array,Chunk
Bytes,582.54 kiB,2.85 kiB
Shape,"(5, 14913)","(1, 365)"
Count,9169 Tasks,210 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 582.54 kiB 2.85 kiB Shape (5, 14913) (1, 365) Count 9169 Tasks 210 Chunks Type float64 numpy.ndarray",14913  5,

Unnamed: 0,Array,Chunk
Bytes,582.54 kiB,2.85 kiB
Shape,"(5, 14913)","(1, 365)"
Count,9169 Tasks,210 Chunks
Type,float64,numpy.ndarray


### now expands the dataset along a dummy dimension 

In [26]:
dset_roll = dset[['sst']].copy()

In [27]:
dset_roll = dset_roll.rolling({'time':roll}, center=True, min_periods=roll).construct(window_dim='roll')

### selects the climatological period 

In [28]:
clim = dset_roll.sel(time=slice(str(climatology[0]), str(climatology[1])))

In [29]:
clim

Unnamed: 0,Array,Chunk
Bytes,1.93 MiB,42.77 kiB
Shape,"(5, 3376, 15)","(1, 365, 15)"
Count,11751 Tasks,50 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 1.93 MiB 42.77 kiB Shape (5, 3376, 15) (1, 365, 15) Count 11751 Tasks 50 Chunks Type float64 numpy.ndarray",15  3376  5,

Unnamed: 0,Array,Chunk
Bytes,1.93 MiB,42.77 kiB
Shape,"(5, 3376, 15)","(1, 365, 15)"
Count,11751 Tasks,50 Chunks
Type,float64,numpy.ndarray


### transpose, then rechunks 

In [30]:
if domain == 'Ninos': 
    
    clim = clim.transpose(*['time','roll','nino'])
    clim = clim.chunk({'time':-1, 'roll':-1, 'nino':len(clim['nino'])})
    
elif domain == 'IOD': 

    clim = clim.transpose(*['time','roll','IOD'])
    clim = clim.chunk({'time':-1, 'roll':-1, 'IOD':len(clim['IOD'])})
    
else: 
    
    clim = clim.transpose(*['time','roll','lat','lon'])
    clim = clim.chunk({'time':-1, 'roll':-1, 'lat':2, 'lon':20})

In [31]:
clim

Unnamed: 0,Array,Chunk
Bytes,1.93 MiB,1.93 MiB
Shape,"(3376, 15, 5)","(3376, 15, 5)"
Count,11802 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 1.93 MiB 1.93 MiB Shape (3376, 15, 5) (3376, 15, 5) Count 11802 Tasks 1 Chunks Type float64 numpy.ndarray",5  15  3376,

Unnamed: 0,Array,Chunk
Bytes,1.93 MiB,1.93 MiB
Shape,"(3376, 15, 5)","(3376, 15, 5)"
Count,11802 Tasks,1 Chunks
Type,float64,numpy.ndarray


In [32]:
clim_q = clim.groupby(clim.time.dt.dayofyear).quantile(quantiles, dim=['time','roll'])

In [33]:
clim_ave = clim.groupby(clim.time.dt.dayofyear).mean(dim=['time','roll'])

In [34]:
clim_std = clim.groupby(clim.time.dt.dayofyear).std(dim=['time','roll'])

In [35]:
with ProgressBar(): 
    clim_q = clim_q.compute()
    clim_ave = clim_ave.compute()
    clim_std = clim_std.compute()

In [36]:
clim_q = clim_q.rename({'sst':'quantiles'})

In [37]:
clim_ave

In [38]:
clim_q['average'] = clim_ave['sst']

In [39]:
clim_q

In [40]:
clim_q['std'] = clim_std['sst']

In [41]:
opath = pathlib.Path(f'../outputs/{domain}')

In [42]:
opath.mkdir(exist_ok=True)

### saves to ZARR

In [43]:
clim_q.to_zarr(opath.joinpath(f'{domain}_OISST_{ndays}days_climatology_{roll}_window_1961_1990.zarr'))

<xarray.backends.zarr.ZarrStore at 0x7f4f481da120>