# Experiment writing to a zarr store on the JASMIN s3 object store.

* Working with `s3fs`, not with `fsspec`
* Cannot write large files (3D zoom level 10) - run out of mem.

In [16]:
from pathlib import Path

import fsspec
import s3fs
import xarray as xr
import zarr

In [8]:
dsT = xr.open_dataset('/gws/nopw/j04/hrcm/mmuetz/DYAMOND3_example_data/healpix/air_temperature/hpz0/glm.n2560_RAL3p3.apverd_20200120T00.air_temperature.hpz0.nc')


  _set_context_ca_bundle_path(ca_bundle_path)
  dsT = xr.open_dataset('/gws/nopw/j04/hrcm/mmuetz/DYAMOND3_example_data/healpix/air_temperature/hpz0/glm.n2560_RAL3p3.apverd_20200120T00.air_temperature.hpz0.nc')


In [9]:
dsT

In [2]:
s3cfg = dict([l.split(' = ') for l in Path('/home/users/mmuetz/.s3cfg').read_text().split('\n') if l])

In [3]:
# Internal access allowed in notebooks - no https and s3-ext.
jasmin_s3 = s3fs.S3FileSystem(
    anon=False, secret=s3cfg['secret_key'],
    key=s3cfg['access_key'],
    client_kwargs={'endpoint_url': 'http://hackathon-o.s3.jc.rl.ac.uk'}
)
jasmin_s3.ls('data')

In [12]:
store= s3fs.S3Map(root='s3://data/hpz0/glm.n2560_RAL3p3.apverd_20200120T00.air_temperature.hpz0.zarr', s3=jasmin_s3, check=False)
task = dsT.to_zarr(store=store)

In [14]:
dsT2 = xr.open_zarr(store=store)

  dsT2 = xr.open_zarr(store=store)


In [15]:
dsT2

Unnamed: 0,Array,Chunk
Bytes,40 B,40 B
Shape,"(5,)","(5,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,timedelta64[ns] numpy.ndarray,timedelta64[ns] numpy.ndarray
"Array Chunk Bytes 40 B 40 B Shape (5,) (5,) Dask graph 1 chunks in 2 graph layers Data type timedelta64[ns] numpy.ndarray",5  1,

Unnamed: 0,Array,Chunk
Bytes,40 B,40 B
Shape,"(5,)","(5,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,timedelta64[ns] numpy.ndarray,timedelta64[ns] numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,11.72 kiB,11.72 kiB
Shape,"(5, 25, 12)","(5, 25, 12)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 11.72 kiB 11.72 kiB Shape (5, 25, 12) (5, 25, 12) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",12  25  5,

Unnamed: 0,Array,Chunk
Bytes,11.72 kiB,11.72 kiB
Shape,"(5, 25, 12)","(5, 25, 12)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [17]:
dsT2.air_temperature.sum().compute()

In [25]:
# Try writing a much larger dataset (zoom level 10).
dsT = xr.open_dataset('/gws/nopw/j04/hrcm/mmuetz/DYAMOND3_example_data/healpix/air_temperature/hpz10/glm.n2560_RAL3p3.apverd_20200120T00.air_temperature.hpz10.nc')


  dsT = xr.open_dataset('/gws/nopw/j04/hrcm/mmuetz/DYAMOND3_example_data/healpix/air_temperature/hpz10/glm.n2560_RAL3p3.apverd_20200120T00.air_temperature.hpz10.nc')


In [None]:
# Runs out of mem with 24G, even though compute=False.
store= s3fs.S3Map(root='s3://data/hpz10/glm.n2560_RAL3p3.apverd_20200120T00.air_temperature.hpz10.zarr', s3=jasmin_s3, check=False)
task = dsT.to_zarr(store=store, compute=False)

## experiment with `fsspec`

* Basic access to s3 store works...
* But, I cannot get this working with `xarray` reading from a store that I've set up using `fsspec`

In [5]:
fs = fsspec.filesystem(
    's3',
    key=s3cfg['access_key'],
    secret=s3cfg['secret_key'],
    client_kwargs={'endpoint_url': 'http://hackathon-o.s3.jc.rl.ac.uk'},  # Works locally withing JASMIN/Notebook service.
)

In [6]:
fs.ls('data')

['data/20200101T0000Z_ph2496.pp', 'data/zarr_example.zarr']

In [19]:
s3_store = fsspec.get_mapper(
    "s3://data/hpz0/glm.n2560_RAL3p3.apverd_20200120T00.air_temperature.hpz0.zarr",
    storage_options={
        'key': s3cfg['access_key'],
        'secret': s3cfg['secret_key'],        
        "client_kwargs": {"endpoint_url": "http://hackathon-o.s3.jc.rl.ac.uk"}  # Adjust for your S3-compatible storage
    }
)

In [23]:
s3_store = fsspec.get_mapper("s3://data/hpz0/glm.n2560_RAL3p3.apverd_20200120T00.air_temperature.hpz0.zarr", fs=fs)

In [24]:
dsT3 = xr.open_zarr(s3_store)

TypeError: AioSession.__init__() got an unexpected keyword argument 'fs'