In [1]:
import zarr # Check version
import numpy as np

zarr.__version__

'2.6.1'

In [2]:
ncar_data = zarr.open("sciserver-experimental/ariel/tests")

In [3]:
ncar_data.info

0,1
Name,/
Type,zarr.hierarchy.Group
Read-only,False
Store type,zarr.storage.DirectoryStore
No. members,6
No. arrays,6
No. groups,0
Arrays,"e, p, t, u, v, w"


In [4]:
ncar_data['e'].info

0,1
Name,/e
Type,zarr.core.Array
Data type,float32
Shape,"(2048, 2048, 2048)"
Chunk shape,"(256, 256, 256)"
Order,C
Read-only,False
Compressor,"Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)"
Store type,zarr.storage.DirectoryStore
No. bytes,34359738368 (32.0G)


<font color="orange">ok, the data is compressed. Let's test access time</font>

In [5]:
chunk_size = (256, 256, 256)
print("Chunk Size: ", np.product(np.array(chunk_size)) / (1024**2), " MB")

Chunk Size:  16.0  MB


## Note if you're using 256^3, you're exactly matching chunk size

In [6]:
a = np.empty(shape=(chunk_size))

## Cold Cache - Energy (2.5 Storage Ratio)

In [12]:
%%timeit -o -n 1 -r 1

# _ = ncar_data['t'][:256,:256,:256]
_ = ncar_data['e'][256:512,256:512,256:512]

364 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


<TimeitResult : 364 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)>

<font color="orange">Make sure to only access across chunks if you want to</font>

## Warm Access - Energy

In [21]:
%%timeit -o

# _ = ncar_data['t'][:256,:256,:256]
_ = ncar_data['e'][256:512,256:512,256:512]

71.8 ms ± 1.18 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


<TimeitResult : 71.8 ms ± 1.18 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)>

In [22]:
# _ gets previous output
_.all_runs

[0.733253882965073,
 0.7138614639407024,
 0.7114443189930171,
 0.708593490999192,
 0.7022965310607105,
 0.7235285399947315,
 0.7359849340282381]

## Cold Cache - Temp (1.8 Storage Ratio)

In [3]:
%%timeit -o -n 1 -r 1

a = ncar_data['t'][1024:1280,768:1024,512:768]

728 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


<TimeitResult : 728 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)>

## Warm access - Temp

In [4]:
%%timeit -o

a = ncar_data['t'][1024:1280,768:1024,512:768]

40.1 ms ± 1.57 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


<TimeitResult : 40.1 ms ± 1.57 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)>

## Cold access - w (Storage Ratio 1.3)

In [5]:
%%timeit -o -n 1 -r 1

_ = ncar_data['w'][:256,:256,:256]

434 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


<TimeitResult : 434 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)>

## Warm access - w

In [6]:
%%timeit -o

_ = ncar_data['w'][:256,:256,:256]

78.8 ms ± 1.9 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


<TimeitResult : 78.8 ms ± 1.9 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)>

In [53]:
# Need to write uncompressed data - ignore in later runs
# fsstore = zarr.storage.FSStore('ncar_zarr_uncompressed/e')

In [59]:
# https://zarr.readthedocs.io/en/stable/api/creation.html#zarr.creation.array

# e_arr = zarr.creation.array(ncar_data['e'], compressor=None)

In [7]:
# e_arr.info

In [69]:
# # https://github.com/zarr-developers/zarr-python/issues/472
# zarr.save_array("ncar_zarr_uncompressed/e", e_arr, compressor=None)

In [8]:
ncar_e_uncompressed = zarr.open("ncar_zarr_uncompressed/e")

In [9]:
ncar_e_uncompressed.info

0,1
Type,zarr.core.Array
Data type,float32
Shape,"(2048, 2048, 2048)"
Chunk shape,"(256, 256, 256)"
Order,C
Read-only,False
Compressor,
Store type,zarr.storage.DirectoryStore
No. bytes,34359738368 (32.0G)
No. bytes stored,34359738622 (32.0G)


### Cold access - Energy Uncompressed

<font color="orange">Timeit runs multiple times. Filesystem should be able to cache better</font>

In [10]:
%%timeit -o -n 1 -r 1

_ = ncar_e_uncompressed[:256,:256,:256]

504 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


<TimeitResult : 504 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)>

## Warm access - Energy Uncompressed

In [11]:
%%timeit -o

_ = ncar_e_uncompressed[:256,:256,:256]

136 ms ± 586 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


<TimeitResult : 136 ms ± 586 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)>

In [12]:
_.all_runs

[1.3577309621032327,
 1.3584185810759664,
 1.364055837970227,
 1.3720491729909554,
 1.3634938159957528,
 1.3642271049320698,
 1.374529485940002]

## t-var. Uncompressed - Cold

In [120]:
# https://github.com/zarr-developers/zarr-python/issues/472
# zarr.save_array("ncar_zarr_uncompressed/t", ncar_data['t'], compressor=None)

In [13]:
ncar_e_uncompressed = zarr.open("ncar_zarr_uncompressed/t")
ncar_e_uncompressed.info

0,1
Type,zarr.core.Array
Data type,float32
Shape,"(2048, 2048, 2048)"
Chunk shape,"(256, 256, 256)"
Order,C
Read-only,False
Compressor,
Store type,zarr.storage.DirectoryStore
No. bytes,34359738368 (32.0G)
No. bytes stored,34359738622 (32.0G)


In [14]:
%%timeit -o -n 1 -r 1

_ = ncar_e_uncompressed[:256,:256,:256]

397 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


<TimeitResult : 397 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)>

## Warm access - Temp Uncompressed

In [15]:
%%timeit -o

_ = ncar_e_uncompressed[:256,:256,:256]

138 ms ± 2.48 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


<TimeitResult : 138 ms ± 2.48 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)>

## w-var Uncompressed. Cold

In [110]:
# # https://github.com/zarr-developers/zarr-python/issues/472
# zarr.save_array("ncar_zarr_uncompressed/w", ncar_data['w'], compressor=None)

In [16]:
ncar_e_uncompressed = zarr.open("ncar_zarr_uncompressed/w")
ncar_e_uncompressed.info

0,1
Type,zarr.core.Array
Data type,float32
Shape,"(2048, 2048, 2048)"
Chunk shape,"(256, 256, 256)"
Order,C
Read-only,False
Compressor,
Store type,zarr.storage.DirectoryStore
No. bytes,34359738368 (32.0G)
No. bytes stored,34359738622 (32.0G)


In [17]:
%%timeit -o -n 1 -r 1

_ = ncar_e_uncompressed[:256,:256,:256]

382 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


<TimeitResult : 382 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)>

## W-var Warm

In [18]:
%%timeit -o

_ = ncar_e_uncompressed[:256,:256,:256]

136 ms ± 141 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


<TimeitResult : 136 ms ± 141 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)>

# Results overall - milliseconds


Uncompressed:

t - 397, 138
w - 382 , 136