## MUR data reading test

In [1]:
# start a cluster, the key to reading effectively on Cloud
from dask_kubernetes import KubeCluster
from dask.distributed import Client

In [2]:
cluster = KubeCluster(n_workers=20)
client = Client(cluster)
cluster

distributed.scheduler - INFO - Clear task state
distributed.scheduler - INFO -   Scheduler at:    tcp://10.32.2.85:33019
distributed.scheduler - INFO -   dashboard at:                     :8787
distributed.scheduler - INFO - Receive client connection: Client-0523f14a-3f98-11ea-83bb-76a84568c6e2
distributed.core - INFO - Starting established connection


VBox(children=(HTML(value='<h2>KubeCluster</h2>'), HBox(children=(HTML(value='\n<div>\n  <style scoped>\n    .…

In [3]:
import xarray as xr
import fsspec

In [4]:
%%time
ds = xr.open_zarr(fsspec.get_mapper('s3://nasa-eodc/eodc/mursst_zarr/5x1799x3600', anon=True), 
              consolidated=True)

CPU times: user 10.7 s, sys: 718 ms, total: 11.4 s
Wall time: 1min 16s


In [5]:
ds.analysed_sst

In [6]:
ds.analysed_sst.encoding

{'chunks': (5, 1799, 3600),
 'compressor': Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0),
 'filters': None,
 '_FillValue': -32768,
 'scale_factor': 0.001,
 'add_offset': 298.15,
 'dtype': dtype('int16')}

In [7]:
chunk_mb = (5*1799*3600*2)/1e6
print(chunk_mb)

64.764


Global mean at a specific time:

In [9]:
%%time
mean1 = ds.analysed_sst.sel(time='2004-01-01').mean(dim=['lon','lat']).compute()

distributed.scheduler - INFO - Register tcp://10.32.6.3:36957
distributed.scheduler - INFO - Starting worker compute stream, tcp://10.32.6.3:36957
distributed.core - INFO - Starting established connection
distributed.scheduler - INFO - Register tcp://10.32.5.2:34873
distributed.scheduler - INFO - Starting worker compute stream, tcp://10.32.5.2:34873
distributed.core - INFO - Starting established connection
distributed.scheduler - INFO - Register tcp://10.32.6.2:44135
distributed.scheduler - INFO - Starting worker compute stream, tcp://10.32.6.2:44135
distributed.core - INFO - Starting established connection
distributed.scheduler - INFO - Register tcp://10.32.6.4:41573
distributed.scheduler - INFO - Starting worker compute stream, tcp://10.32.6.4:41573
distributed.core - INFO - Starting established connection
distributed.scheduler - INFO - Register tcp://10.32.5.4:45761
distributed.scheduler - INFO - Starting worker compute stream, tcp://10.32.5.4:45761
distributed.core - INFO - Startin

CPU times: user 8.55 s, sys: 1.07 s, total: 9.62 s
Wall time: 4min 43s


Time mean at a specific location:

In [8]:
%%time
mean2 = ds.analysed_sst.sel(lon=-110, lat=0).mean(dim='time').compute()

distributed.scheduler - INFO - Register tcp://10.32.10.2:32895
distributed.scheduler - INFO - Starting worker compute stream, tcp://10.32.10.2:32895
distributed.core - INFO - Starting established connection
distributed.scheduler - INFO - Register tcp://10.32.10.4:39603
distributed.scheduler - INFO - Starting worker compute stream, tcp://10.32.10.4:39603
distributed.core - INFO - Starting established connection
distributed.scheduler - INFO - Register tcp://10.32.10.3:37197
distributed.scheduler - INFO - Starting worker compute stream, tcp://10.32.10.3:37197
distributed.core - INFO - Starting established connection
distributed.scheduler - INFO - Register tcp://10.32.6.3:41363
distributed.scheduler - INFO - Starting worker compute stream, tcp://10.32.6.3:41363
distributed.core - INFO - Starting established connection
distributed.scheduler - INFO - Register tcp://10.32.6.2:34389
distributed.scheduler - INFO - Starting worker compute stream, tcp://10.32.6.2:34389
distributed.core - INFO - S

CPU times: user 18 s, sys: 1.7 s, total: 19.7 s
Wall time: 5min 29s


In [9]:
import hvplot.xarray

In [14]:
%%time
ds.analysed_sst.sel(lon=-140, lat=53).hvplot(grid=True)

CPU times: user 4.91 s, sys: 438 ms, total: 5.34 s
Wall time: 22.7 s


In [13]:
%%time
ds.analysed_sst.sel(time='2005-01-01').hvplot.quadmesh(x='lon', y='lat', geo=True, 
                rasterize=True, cmap='rainbow', tiles='EsriImagery')

CPU times: user 5.12 s, sys: 381 ms, total: 5.5 s
Wall time: 5.45 s


Subset the El Niño/La Niña Region:

In [13]:
ds_e = ds.sel(lon=slice(-180,-70), lat=slice(-25,25))

Difference the monthly mean temperature fields for Jan 2016 (El Niño) and Jan 2014 (normal)

In [14]:
sst_jan2016 = ds_e.analysed_sst.sel(time=slice('2016-01-01','2016-02-01')).mean(dim='time')
sst_jan2014 = ds_e.analysed_sst.sel(time=slice('2014-01-01','2014-02-01')).mean(dim='time')

In [15]:
%%time
(sst_jan2016 - sst_jan2014).hvplot.quadmesh(x='lon', y='lat', geo=True, 
                rasterize=True, cmap='rainbow', tiles='EsriImagery')

CPU times: user 4.44 s, sys: 373 ms, total: 4.81 s
Wall time: 4.74 s


In [None]:
ds.sea_ice_fraction[0,:,:].plot()