# Benchmark a year pixel drill

In [None]:
import dask.bag as db
import dask
from dask.diagnostics import ProgressBar

In [None]:
import xarray as xr
import fsspec
import glob
xr.set_options(display_style='text');

In [None]:
references = sorted(glob.glob('./refs/2020*_night.json'))
len(references)

In [None]:
def open_zarr(r):
    m = fsspec.get_mapper("reference://", 
                              remote_protocol='s3',
                    fo=r, remote_options={'anon':True,'skip_instance_cache':True,'use_listings_cache':False})
    ds = xr.open_zarr(m)
    return ds

In [None]:
b = db.from_sequence(references,npartitions=36)
zarrs = b.map(open_zarr)
with dask.config.set(scheduler='processes'): 
    with ProgressBar():
        zarrs = zarrs.compute()

In [None]:
zarrs = [z.drop_vars(['sea_ice_fraction', 'sea_ice_fraction_dtime_from_sst'], errors='ignore') for z in zarrs]

In [None]:
## %%time
ds = xr.concat(zarrs,dim='time',coords='minimal',compat='override',combine_attrs='override', fill_value='')

In [None]:
with dask.config.set(scheduler='threads'):
    with ProgressBar():
        timeseries = ds.sel(lat=-32.45,lon=115.55,method='nearest').compute()

In [None]:
timeseries.sea_surface_temperature.plot()

## Convert To NetCDF

In [None]:
timeseries.to_netcdf();

## Median over an area in time

In [None]:
ds_crop = ds.sel(latitude=slice(-32.0,-32.5),longitude=slice(115.0,115.5))
ds_crop

In [None]:
with dask.config.set(scheduler='threads'):
    with ProgressBar():
        timeseries = ds_crop.stack(space=['latitude','longitude']).median(dim='space').compute()

In [None]:
timeseries.sst.plot()

In [None]:
# pretty noisy data from MODIS ....