In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
%matplotlib inline
import cmocean
from matplotlib import pyplot as plt

In [3]:
import os
import pathlib
import sys
from subprocess import call

In [4]:
from datetime import datetime

In [5]:
from datetime import datetime, timedelta

In [6]:
import numpy as np
import pandas as pd
import xarray as xr

In [7]:
sys.path.append("../code/")

In [8]:
import src

### parameters for papermill 

In [28]:
regions = ["NZ", "Ninos", "IOD"]
region = "NZ"
dpath = "/media/nicolasf/END19101/data/OISST/daily"
clim_path = "/home/nicolasf/operational/OISST_indices/outputs/"
shapes_path = "/home/nicolasf/operational/OISST_indices/data/shapefiles/"
ndays_agg = 1
ndays_back = 180
quantile = 0.9
lag = 0

### get the pathlib paths for the data and the climatology 

In [29]:
dpath = pathlib.Path(dpath).joinpath(region)
clim_path = pathlib.Path(clim_path).joinpath(region)
shapes_path = pathlib.Path(shapes_path)

### get the current date 

In [11]:
current_date = datetime.utcnow()

### get the first day of the period to extract 

In [12]:
first_day = current_date - timedelta(days=ndays_back)

## get the years to read 

In [13]:
years_to_get = np.unique(np.arange(first_day.year, current_date.year + 1))

In [14]:
lfiles = [dpath.joinpath(f"sst.day.mean.{year}.v2.nc") for year in years_to_get]

## opens the near realtime dataset 

In [15]:
dset = xr.open_mfdataset(lfiles, parallel=True, combine="by_coords")

In [16]:
dset

Unnamed: 0,Array,Chunk
Bytes,4.83 MiB,4.83 MiB
Shape,"(220, 80, 72)","(220, 80, 72)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 4.83 MiB 4.83 MiB Shape (220, 80, 72) (220, 80, 72) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",72  80  220,

Unnamed: 0,Array,Chunk
Bytes,4.83 MiB,4.83 MiB
Shape,"(220, 80, 72)","(220, 80, 72)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray


### get rid of the 29th of Feb if present 

In [17]:
dset = dset.convert_calendar("noleap")

### opens the climatology 

In [18]:
clim = xr.open_zarr(
    clim_path.joinpath(f"{region}_OISST_{ndays_agg}days_climatology_15_window.zarr")
)

In [19]:
clim

Unnamed: 0,Array,Chunk
Bytes,8.02 MiB,514.69 kiB
Shape,"(80, 72, 365)","(20, 36, 183)"
Count,17 Tasks,16 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 8.02 MiB 514.69 kiB Shape (80, 72, 365) (20, 36, 183) Count 17 Tasks 16 Chunks Type float32 numpy.ndarray",365  72  80,

Unnamed: 0,Array,Chunk
Bytes,8.02 MiB,514.69 kiB
Shape,"(80, 72, 365)","(20, 36, 183)"
Count,17 Tasks,16 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,80.20 MiB,1.01 MiB
Shape,"(365, 5, 80, 72)","(92, 2, 20, 36)"
Count,97 Tasks,96 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 80.20 MiB 1.01 MiB Shape (365, 5, 80, 72) (92, 2, 20, 36) Count 97 Tasks 96 Chunks Type float64 numpy.ndarray",365  1  72  80  5,

Unnamed: 0,Array,Chunk
Bytes,80.20 MiB,1.01 MiB
Shape,"(365, 5, 80, 72)","(92, 2, 20, 36)"
Count,97 Tasks,96 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,8.02 MiB,514.69 kiB
Shape,"(80, 72, 365)","(20, 36, 183)"
Count,17 Tasks,16 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 8.02 MiB 514.69 kiB Shape (80, 72, 365) (20, 36, 183) Count 17 Tasks 16 Chunks Type float32 numpy.ndarray",365  72  80,

Unnamed: 0,Array,Chunk
Bytes,8.02 MiB,514.69 kiB
Shape,"(80, 72, 365)","(20, 36, 183)"
Count,17 Tasks,16 Chunks
Type,float32,numpy.ndarray


### calculate the anomalies with respect to the 1991-2020 average

In [20]:
anoms = dset.groupby(dset.time.dt.dayofyear) - clim["average"]

### repeat the climatology over the time dimension 

In [21]:
clim_repeat = clim.sel(dayofyear=dset.time.dt.dayofyear)

In [22]:
clim_repeat

Unnamed: 0,Array,Chunk
Bytes,4.83 MiB,514.69 kiB
Shape,"(80, 72, 220)","(20, 36, 183)"
Count,33 Tasks,16 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 4.83 MiB 514.69 kiB Shape (80, 72, 220) (20, 36, 183) Count 33 Tasks 16 Chunks Type float32 numpy.ndarray",220  72  80,

Unnamed: 0,Array,Chunk
Bytes,4.83 MiB,514.69 kiB
Shape,"(80, 72, 220)","(20, 36, 183)"
Count,33 Tasks,16 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,48.34 MiB,1.01 MiB
Shape,"(220, 5, 80, 72)","(92, 2, 20, 36)"
Count,169 Tasks,72 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 48.34 MiB 1.01 MiB Shape (220, 5, 80, 72) (92, 2, 20, 36) Count 169 Tasks 72 Chunks Type float64 numpy.ndarray",220  1  72  80  5,

Unnamed: 0,Array,Chunk
Bytes,48.34 MiB,1.01 MiB
Shape,"(220, 5, 80, 72)","(92, 2, 20, 36)"
Count,169 Tasks,72 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.83 MiB,514.69 kiB
Shape,"(80, 72, 220)","(20, 36, 183)"
Count,33 Tasks,16 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 4.83 MiB 514.69 kiB Shape (80, 72, 220) (20, 36, 183) Count 33 Tasks 16 Chunks Type float32 numpy.ndarray",220  72  80,

Unnamed: 0,Array,Chunk
Bytes,4.83 MiB,514.69 kiB
Shape,"(80, 72, 220)","(20, 36, 183)"
Count,33 Tasks,16 Chunks
Type,float32,numpy.ndarray


### derive the mask 

In [23]:
mask = dset["sst"].where(dset["sst"] >= clim_repeat["quantiles"].sel(quantile=quantile))
mask = mask.where(np.isnan(mask), other=1)

In [None]:
reads the shapefiles and 

In [26]:
import regionmask

In [27]:
import geopandas as gpd

In [37]:
NNI = gpd.read_file(shapes_path.joinpath('NNI_buffered_50km.shp')).to_crs('EPSG:4326')
WNI = gpd.read_file(shapes_path.joinpath('WNI_buffered_50km.shp')).to_crs('EPSG:4326')
ENI = gpd.read_file(shapes_path.joinpath('ENI_buffered_50km.shp')).to_crs('EPSG:4326')
NSI = gpd.read_file(shapes_path.joinpath('NSI_buffered_50km.shp')).to_crs('EPSG:4326')
WSI = gpd.read_file(shapes_path.joinpath('WSI_buffered_50km.shp')).to_crs('EPSG:4326')
ESI = gpd.read_file(shapes_path.joinpath('ESI_buffered_50km.shp')).to_crs('EPSG:4326')

In [38]:
dset

Unnamed: 0,Array,Chunk
Bytes,4.83 MiB,4.83 MiB
Shape,"(220, 80, 72)","(220, 80, 72)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 4.83 MiB 4.83 MiB Shape (220, 80, 72) (220, 80, 72) Count 5 Tasks 1 Chunks Type float32 numpy.ndarray",72  80  220,

Unnamed: 0,Array,Chunk
Bytes,4.83 MiB,4.83 MiB
Shape,"(220, 80, 72)","(220, 80, 72)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray


In [39]:
anoms

Unnamed: 0,Array,Chunk
Bytes,4.83 MiB,514.69 kiB
Shape,"(220, 80, 72)","(183, 20, 36)"
Count,102 Tasks,16 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 4.83 MiB 514.69 kiB Shape (220, 80, 72) (183, 20, 36) Count 102 Tasks 16 Chunks Type float32 numpy.ndarray",72  80  220,

Unnamed: 0,Array,Chunk
Bytes,4.83 MiB,514.69 kiB
Shape,"(220, 80, 72)","(183, 20, 36)"
Count,102 Tasks,16 Chunks
Type,float32,numpy.ndarray


In [40]:
mask

Unnamed: 0,Array,Chunk
Bytes,4.83 MiB,258.75 kiB
Shape,"(220, 80, 72)","(92, 20, 36)"
Count,342 Tasks,24 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 4.83 MiB 258.75 kiB Shape (220, 80, 72) (92, 20, 36) Count 342 Tasks 24 Chunks Type float32 numpy.ndarray",72  80  220,

Unnamed: 0,Array,Chunk
Bytes,4.83 MiB,258.75 kiB
Shape,"(220, 80, 72)","(92, 20, 36)"
Count,342 Tasks,24 Chunks
Type,float32,numpy.ndarray
