# Xarray/Zarr/Icechunk on S3

You will need to run this notebook in a `conda` environment created from `environment.yml`.

In [1]:
import math

import numpy as np
import zarr

from icechunk import IcechunkStore, Storage

In [4]:
s3_storage = Storage.s3_from_env("bucket=icechunk-test",prefix="oscar-demo-repository")

## Create a new Zarr store backed by Icechunk

This example uses a S3 store

In [3]:
config = CONFIGS["s3"]
store = await IcechunkStore.create(
    storage=s3_storage,
    mode="w",
)

ValueError: Error initializing repository: ref error: `storage error `ObjectStore(Generic { store: "S3", source: Reqwest { retries: 10, max_retries: 10, elapsed: 7.069211292s, retry_timeout: 180s, source: reqwest::Error { kind: Request, url: Url { scheme: "http", cannot_be_a_base: false, username: "", password: None, host: Some(Ipv4(169.254.169.254)), port: None, path: "/latest/api/token", query: None, fragment: None }, source: hyper_util::client::legacy::Error(Connect, ConnectError("tcp connect error", Os { code: 64, kind: Uncategorized, message: "Host is down" })) } } })``

## Real data

In [4]:
import xarray as xr

In [5]:
import fsspec

fs = fsspec.filesystem("s3")

In [6]:
oscar = xr.open_dataset(
    fs.open("s3://earthmover-sample-data/netcdf/oscar_vel2018.nc"),
    chunks={},
    engine="h5netcdf",
)
oscar

Unnamed: 0,Array,Chunk
Bytes,317.33 MiB,317.33 MiB
Shape,"(72, 1, 481, 1201)","(72, 1, 481, 1201)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 317.33 MiB 317.33 MiB Shape (72, 1, 481, 1201) (72, 1, 481, 1201) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",72  1  1201  481  1,

Unnamed: 0,Array,Chunk
Bytes,317.33 MiB,317.33 MiB
Shape,"(72, 1, 481, 1201)","(72, 1, 481, 1201)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,317.33 MiB,317.33 MiB
Shape,"(72, 1, 481, 1201)","(72, 1, 481, 1201)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 317.33 MiB 317.33 MiB Shape (72, 1, 481, 1201) (72, 1, 481, 1201) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",72  1  1201  481  1,

Unnamed: 0,Array,Chunk
Bytes,317.33 MiB,317.33 MiB
Shape,"(72, 1, 481, 1201)","(72, 1, 481, 1201)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,317.33 MiB,317.33 MiB
Shape,"(72, 1, 481, 1201)","(72, 1, 481, 1201)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 317.33 MiB 317.33 MiB Shape (72, 1, 481, 1201) (72, 1, 481, 1201) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",72  1  1201  481  1,

Unnamed: 0,Array,Chunk
Bytes,317.33 MiB,317.33 MiB
Shape,"(72, 1, 481, 1201)","(72, 1, 481, 1201)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,317.33 MiB,317.33 MiB
Shape,"(72, 1, 481, 1201)","(72, 1, 481, 1201)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 317.33 MiB 317.33 MiB Shape (72, 1, 481, 1201) (72, 1, 481, 1201) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",72  1  1201  481  1,

Unnamed: 0,Array,Chunk
Bytes,317.33 MiB,317.33 MiB
Shape,"(72, 1, 481, 1201)","(72, 1, 481, 1201)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [7]:
group = zarr.group(store=store, overwrite=True)
group

Group(_async_group=<AsyncGroup <icechunk.IcechunkStore object at 0x7fedf05205f0>>)

In [8]:
import time

for var in oscar:
    print(var)
    tic = time.time()
    group.create_array(
        name=var,
        shape=oscar[var].shape,
        fill_value=-1234567,
        dtype=oscar[var].dtype,
        data=oscar[var],
        exists_ok=True,
    )
    print(await store.commit(f"wrote {var}"))
    print(f"commited; {time.time() - tic} seconds")

u


[icechunk/src/storage/caching.rs:190:9] "inserting" = "inserting"
[icechunk/src/storage/caching.rs:190:9] &id = ae4702e9b678be1ece58938828b1d404


TXJD559XPAJKFM5C2WQCDB19BM
commited; 167.40236234664917 seconds
um


[icechunk/src/storage/caching.rs:190:9] "inserting" = "inserting"
[icechunk/src/storage/caching.rs:190:9] &id = 20b7393dcd7b80c92d062fb1c415043d


YVM2BH0PN53YG16C7P8H0CEH5G
commited; 176.54246497154236 seconds
v


[icechunk/src/storage/caching.rs:190:9] "inserting" = "inserting"
[icechunk/src/storage/caching.rs:190:9] &id = 921a6c826982dc2197597928862e6e88


BRZYX6Y1E0KFVDJNCV7HSMHN04
commited; 168.94983386993408 seconds
vm
Y9P6P8BYRXEN5JCD0XNK1PV79M
commited; 171.89381194114685 seconds


[icechunk/src/storage/caching.rs:190:9] "inserting" = "inserting"
[icechunk/src/storage/caching.rs:190:9] &id = 89584c43e3e45f7a04da192a189bb0bb


## Open store

1. why is zarr writing anything
2. why is icehcunk letting zarr write something with mode="r"
3. whay aren't we checking out main.
4. can't read data back

In [4]:
store = await IcechunkStore.open_existing(
    storage=s3_storage,
    mode="r",
)
store

<icechunk.IcechunkStore at 0x7fd73f601610>

In [5]:
store

<icechunk.IcechunkStore at 0x7fd73f601610>

In [6]:
root_group = zarr.open_group(store=store)

In [7]:
root_group.members()

()

In [8]:
await store.reset()

In [9]:
await store.checkout("Y9P6P8BYRXEN5JCD0XNK1PV79M")

In [10]:
root_group.members()

(('u',
  <Array <icechunk.IcechunkStore object at 0x7f55ac327fb0>/u shape=(72, 1, 481, 1201) dtype=float64>),
 ('vm',
  <Array <icechunk.IcechunkStore object at 0x7f55ac327fb0>/vm shape=(72, 1, 481, 1201) dtype=float64>),
 ('um',
  <Array <icechunk.IcechunkStore object at 0x7f55ac327fb0>/um shape=(72, 1, 481, 1201) dtype=float64>),
 ('v',
  <Array <icechunk.IcechunkStore object at 0x7f55ac327fb0>/v shape=(72, 1, 481, 1201) dtype=float64>))

In [12]:
u = root_group["u"][0, 0, :, :]

thread '<unnamed>' panicked at /rustc/9b00956e56009bab2aa15d7bff10916599e3d6d6/library/core/src/ops/function.rs:250:5:
there is no reactor running, must be called from the context of a Tokio 1.x runtime
thread '<unnamed>' panicked at /rustc/9b00956e56009bab2aa15d7bff10916599e3d6d6/library/core/src/ops/function.rs:250:5:
there is no reactor running, must be called from the context of a Tokio 1.x runtime
thread '<unnamed>' panicked at /rustc/9b00956e56009bab2aa15d7bff10916599e3d6d6/library/core/src/ops/function.rs:250:5:
there is no reactor running, must be called from the context of a Tokio 1.x runtime
thread '<unnamed>' panicked at /rustc/9b00956e56009bab2aa15d7bff10916599e3d6d6/library/core/src/ops/function.rs:250:5:
there is no reactor running, must be called from the context of a Tokio 1.x runtime
thread '<unnamed>' panicked at /rustc/9b00956e56009bab2aa15d7bff10916599e3d6d6/library/core/src/ops/function.rs:250:5:
there is no reactor running, must be called from the context of a Tok

PanicException: there is no reactor running, must be called from the context of a Tokio 1.x runtime