# Import

In [1]:
from dask.distributed import Client

client = Client("tcp://127.0.0.1:36183")

In [2]:
import sys
sys.path.insert(1, '..')

In [3]:
import s3fs
import numpy as np
import xarray as xr
import scipy
import dask
import gsw
import matplotlib.pyplot as plt
import eos_module, filter_module

# Read and prepare Xarray data

In [16]:
from validate_catalog import all_params
params_dict, cat = all_params()

In [17]:
data=[dict() for i in range(6)]
data[0] = dict({'region' : '1', 'season' : 'fma', 'label' : 'GULFSTR FMA'})
data[1] = dict({'region' : '1', 'season' : 'aso', 'label' : 'GULFSTR ASO'})
data[2] = dict({'region' : '2', 'season' : 'fma', 'label' : 'AÇORES FMA'})
data[3] = dict({'region' : '2', 'season' : 'aso', 'label' : 'AÇORES ASO'})
data[4] = dict({'region' : '3', 'season' : 'fma', 'label' : 'WESTMED FMA'})
data[5] = dict({'region' : '3', 'season' : 'aso', 'label' : 'WESTMED ASO'})

In [18]:
x_slice = slice(0,-1)
y_slice = slice(0,-1)

data_slice = dict(time_counter=slice(0,-1,150),y=y_slice,x=x_slice) # take every 150-th time output
coord_slice = dict(y=y_slice,x=x_slice)

nb_of_time_chunks = 4   # number of chunks on time-axis

## Read

In [19]:
%%time
for i in range(len(data)) :
    data[i]['HR'] = cat.eNATL60(region=data[i]['region'],datatype='surface_hourly', season=data[i]['season']).to_dask()\
    .drop(['fmask','qt_oce', 'somecrty','sometauy','sossheig','sowaflup','sozocrtx','sozotaux','umask','vmask'],errors='ignore')\
    .isel(data_slice).chunk({'time_counter' : nb_of_time_chunks, 'x' : -1, 'y' : -1})
    #.resample({'time_counter' : resampling_avg_period}, closed='right', label='right').mean()
    #current_set['x'] = np.arange(len(current_set['x']))
    #current_set['y'] = np.arange(len(current_set['y']))
    data[i]['HR'].attrs['source'] = 'eNATL60'

CPU times: user 875 ms, sys: 30.1 ms, total: 905 ms
Wall time: 2.9 s


## Compute true density

In [28]:
%%time
for i in range(len(data)) :
    for dataKey in ['HR'] :
        # density anomaly (sigma0). denoted as 'true' since is computed on the basis of original (non-filtered) HR data
        data[i][dataKey]['sigma0_true'] = xr.apply_ufunc(gsw.density.sigma0, data[i][dataKey]['sosaline'], data[i][dataKey]['sosstsst'], dask='parallelized')
        # sqr of conservative temperature (to compute the subgrid variance further)
        data[i][dataKey]['sst_sqr'] = data[i][dataKey]['sosstsst']**2
        data[i][dataKey]['sal_sqr'] = data[i][dataKey]['sosaline']**2

CPU times: user 89.7 ms, sys: 1.07 ms, total: 90.8 ms
Wall time: 90.2 ms


## Filter (coarse-grainig)

In [29]:
coarsening_window_size = 15 # to match eORCA025 resolution

In [30]:
@dask.delayed
def coarsen(XdataSet, coarsening_window_size) :
    drop_nan_data = data[i]['HR'].dropna(dim='y', thresh=3).dropna(dim='x', thresh=3)
    area_2d = xr.DataArray((drop_nan_data.e1t * drop_nan_data.e2t), dims=['y','x'])
    wet_mask = xr.DataArray(drop_nan_data.tmask, dims=['y','x'])
    area_masked = area_2d*wet_mask
    data_weighted = area_masked*drop_nan_data
    data_weighted_sums = data_weighted.coarsen(x=coarsening_window_size, y=coarsening_window_size, boundary='trim', \
                                           coord_func={'x':'mean', 'y':'mean', 'lat':'mean', 'lon':'mean'}).sum(skipna=True)
    area_window_sums = area_masked.coarsen(x=coarsening_window_size, y=coarsening_window_size, boundary='trim').sum(skipna=True)
    result = data_weighted_sums/area_window_sums
    result['x'] = np.arange(len(result['x']))
    result['y'] = np.arange(len(result['y']))
    return result

In [31]:
%%time
for i in range(len(data)) :     
    data[i]['HR_coarsened'] = coarsen(data[i]['HR'], coarsening_window_size)

CPU times: user 7.64 ms, sys: 4.01 ms, total: 11.6 ms
Wall time: 12 ms


## Compute additional fields

In [45]:
%%time
for i in range(len(data)) :
    ds = data[i]['HR_coarsened']
    # subgrid temperature and salinity variance
    #ds['var_T'] = ds['sst_sqr'] - ds['sosstsst']**2
    ds.assign(var_T = ds['sst_sqr'] - ds['sosstsst']**2)
    # ds['var_S'] = ds['sal_sqr'] - ds['sosaline']**2
    # # "model" density evaluated with the filtered fields
    # ds['sigma0_model'] = xr.apply_ufunc(gsw.density.sigma0, ds['sosaline'], ds['sosstsst'], dask='parallelized')
    # # density error
    # ds['sigma0_delta'] = ds['sigma0_true'] - ds['sigma0_model']

CPU times: user 2.67 ms, sys: 5.84 ms, total: 8.51 ms
Wall time: 9.35 ms


In [46]:
data[i]['HR_coarsened']['sosaline'].isel(time_counter=0).compute()

Unnamed: 0,Array,Chunk
Bytes,13.06 kiB,13.06 kiB
Shape,"(44, 38)","(44, 38)"
Count,114 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 13.06 kiB 13.06 kiB Shape (44, 38) (44, 38) Count 114 Tasks 1 Chunks Type float64 numpy.ndarray",38  44,

Unnamed: 0,Array,Chunk
Bytes,13.06 kiB,13.06 kiB
Shape,"(44, 38)","(44, 38)"
Count,114 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,6.53 kiB,6.53 kiB
Shape,"(44, 38)","(44, 38)"
Count,8 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 6.53 kiB 6.53 kiB Shape (44, 38) (44, 38) Count 8 Tasks 1 Chunks Type float32 numpy.ndarray",38  44,

Unnamed: 0,Array,Chunk
Bytes,6.53 kiB,6.53 kiB
Shape,"(44, 38)","(44, 38)"
Count,8 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,13.06 kiB,13.06 kiB
Shape,"(44, 38)","(44, 38)"
Count,8 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 13.06 kiB 13.06 kiB Shape (44, 38) (44, 38) Count 8 Tasks 1 Chunks Type float64 numpy.ndarray",38  44,

Unnamed: 0,Array,Chunk
Bytes,13.06 kiB,13.06 kiB
Shape,"(44, 38)","(44, 38)"
Count,8 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,13.06 kiB,13.06 kiB
Shape,"(44, 38)","(44, 38)"
Count,8 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 13.06 kiB 13.06 kiB Shape (44, 38) (44, 38) Count 8 Tasks 1 Chunks Type float64 numpy.ndarray",38  44,

Unnamed: 0,Array,Chunk
Bytes,13.06 kiB,13.06 kiB
Shape,"(44, 38)","(44, 38)"
Count,8 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,13.06 kiB,13.06 kiB
Shape,"(44, 38)","(44, 38)"
Count,8 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 13.06 kiB 13.06 kiB Shape (44, 38) (44, 38) Count 8 Tasks 1 Chunks Type float64 numpy.ndarray",38  44,

Unnamed: 0,Array,Chunk
Bytes,13.06 kiB,13.06 kiB
Shape,"(44, 38)","(44, 38)"
Count,8 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,13.06 kiB,13.06 kiB
Shape,"(44, 38)","(44, 38)"
Count,8 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 13.06 kiB 13.06 kiB Shape (44, 38) (44, 38) Count 8 Tasks 1 Chunks Type float64 numpy.ndarray",38  44,

Unnamed: 0,Array,Chunk
Bytes,13.06 kiB,13.06 kiB
Shape,"(44, 38)","(44, 38)"
Count,8 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,13.06 kiB,13.06 kiB
Shape,"(44, 38)","(44, 38)"
Count,8 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 13.06 kiB 13.06 kiB Shape (44, 38) (44, 38) Count 8 Tasks 1 Chunks Type float64 numpy.ndarray",38  44,

Unnamed: 0,Array,Chunk
Bytes,13.06 kiB,13.06 kiB
Shape,"(44, 38)","(44, 38)"
Count,8 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,13.06 kiB,13.06 kiB
Shape,"(44, 38)","(44, 38)"
Count,8 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 13.06 kiB 13.06 kiB Shape (44, 38) (44, 38) Count 8 Tasks 1 Chunks Type float64 numpy.ndarray",38  44,

Unnamed: 0,Array,Chunk
Bytes,13.06 kiB,13.06 kiB
Shape,"(44, 38)","(44, 38)"
Count,8 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,13.06 kiB,13.06 kiB
Shape,"(44, 38)","(44, 38)"
Count,8 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 13.06 kiB 13.06 kiB Shape (44, 38) (44, 38) Count 8 Tasks 1 Chunks Type float64 numpy.ndarray",38  44,

Unnamed: 0,Array,Chunk
Bytes,13.06 kiB,13.06 kiB
Shape,"(44, 38)","(44, 38)"
Count,8 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,13.06 kiB,13.06 kiB
Shape,"(44, 38)","(44, 38)"
Count,8 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 13.06 kiB 13.06 kiB Shape (44, 38) (44, 38) Count 8 Tasks 1 Chunks Type float64 numpy.ndarray",38  44,

Unnamed: 0,Array,Chunk
Bytes,13.06 kiB,13.06 kiB
Shape,"(44, 38)","(44, 38)"
Count,8 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,6.53 kiB,6.53 kiB
Shape,"(44, 38)","(44, 38)"
Count,8 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 6.53 kiB 6.53 kiB Shape (44, 38) (44, 38) Count 8 Tasks 1 Chunks Type float32 numpy.ndarray",38  44,

Unnamed: 0,Array,Chunk
Bytes,6.53 kiB,6.53 kiB
Shape,"(44, 38)","(44, 38)"
Count,8 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,6.53 kiB,6.53 kiB
Shape,"(44, 38)","(44, 38)"
Count,8 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 6.53 kiB 6.53 kiB Shape (44, 38) (44, 38) Count 8 Tasks 1 Chunks Type float32 numpy.ndarray",38  44,

Unnamed: 0,Array,Chunk
Bytes,6.53 kiB,6.53 kiB
Shape,"(44, 38)","(44, 38)"
Count,8 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,6.53 kiB,6.53 kiB
Shape,"(44, 38)","(44, 38)"
Count,8 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 6.53 kiB 6.53 kiB Shape (44, 38) (44, 38) Count 8 Tasks 1 Chunks Type float32 numpy.ndarray",38  44,

Unnamed: 0,Array,Chunk
Bytes,6.53 kiB,6.53 kiB
Shape,"(44, 38)","(44, 38)"
Count,8 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,6.53 kiB,6.53 kiB
Shape,"(44, 38)","(44, 38)"
Count,8 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 6.53 kiB 6.53 kiB Shape (44, 38) (44, 38) Count 8 Tasks 1 Chunks Type float32 numpy.ndarray",38  44,

Unnamed: 0,Array,Chunk
Bytes,6.53 kiB,6.53 kiB
Shape,"(44, 38)","(44, 38)"
Count,8 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,8 B,8.0 B
Shape,(),()
Count,52 Tasks,1 Chunks
Type,datetime64[ns],numpy.ndarray
Array Chunk Bytes 8 B 8.0 B Shape () () Count 52 Tasks 1 Chunks Type datetime64[ns] numpy.ndarray,,

Unnamed: 0,Array,Chunk
Bytes,8 B,8.0 B
Shape,(),()
Count,52 Tasks,1 Chunks
Type,datetime64[ns],numpy.ndarray
