# TEST MUR sst zarr stores



In [None]:
import warnings
# filter some warning messages
warnings.filterwarnings("ignore") 

import xarray as xr
import fsspec
from matplotlib import pyplot as plt
import numpy as np
import cartopy
import cartopy.crs as ccrs
import intake
import dask

xr.set_options(display_style="html")  #display dataset nicely 
%matplotlib inline
plt.rcParams['figure.figsize'] = 12, 6
%config InlineBackend.figure_format = 'retina' 

# Start Dask gateway

In [None]:
from dask_gateway import Gateway
from dask.distributed import Client

In [None]:
gateway = Gateway()
cluster = gateway.new_cluster()
cluster.adapt(minimum=1, maximum=20)
client = Client(cluster)
cluster

** ☝️ Don’t forget to click the link above or copy it to the Dask dashboard on the left to view the scheduler dashboard! **

## original store

In [None]:
%%time
file_location = 's3://mur-sst/zarr-v1'
ds_sst = xr.open_zarr(fsspec.get_mapper(file_location, anon=True),consolidated=True)
ds_sst

### Read entire 10 years of data at 1 point.

Select the ``analysed_sst`` variable over a specific time period, `lat`, and `lon` and load the data into memory.

In [None]:
%%time
sst_timeseries = ds_sst['analysed_sst'].sel(time=slice('2010-01-01','2020-01-01'),
                                            lat=47,
                                            lon=-145
                                           ).load()
sst_timeseries.plot()

### The anomaly is more interesting...  

Use [.groupby](http://xarray.pydata.org/en/stable/generated/xarray.DataArray.groupby.html#xarray-dataarray-groupby) method to calculate the climatology and [.resample](http://xarray.pydata.org/en/stable/generated/xarray.Dataset.resample.html#xarray-dataset-resample) method to then average it into 1-month bins

In [None]:
sst_climatology = sst_timeseries.groupby('time.dayofyear').mean()
sst_anomaly = sst_timeseries.groupby('time.dayofyear')-sst_climatology
sst_anomaly_monthly = sst_anomaly.resample(time='1MS').mean()

#plot the data
sst_anomaly.plot()
sst_anomaly_monthly.plot()
plt.axhline(linewidth=2,color='k')

## NEW store

In [None]:
%%time
file_location = 's3://mur-sst/zarr'
ds_sst = xr.open_zarr(fsspec.get_mapper(file_location, anon=True),consolidated=True)
ds_sst

### Read entire 10 years of data at 1 point.

Select the ``analysed_sst`` variable over a specific time period, `lat`, and `lon` and load the data into memory.

In [None]:
%%time
sst_timeseries = ds_sst['analysed_sst'].sel(time=slice('2010-01-01','2020-01-01'),
                                            lat=47,
                                            lon=-145
                                           ).load()
sst_timeseries.plot()

### The anomaly is more interesting...  

Use [.groupby](http://xarray.pydata.org/en/stable/generated/xarray.DataArray.groupby.html#xarray-dataarray-groupby) method to calculate the climatology and [.resample](http://xarray.pydata.org/en/stable/generated/xarray.Dataset.resample.html#xarray-dataset-resample) method to then average it into 1-month bins

In [None]:
sst_climatology = sst_timeseries.groupby('time.dayofyear').mean()
sst_anomaly = sst_timeseries.groupby('time.dayofyear')-sst_climatology
sst_anomaly_monthly = sst_anomaly.resample(time='1MS').mean()

#plot the data
sst_anomaly.plot()
sst_anomaly_monthly.plot()
plt.axhline(linewidth=2,color='k')

## Close the cluster

In [None]:
client.close()
cluster.close()