In [1]:
import zarr # Check version
import numpy as np

zarr.__version__

'2.6.1'

In [2]:
ncar_data = zarr.open("sciserver-experimental/ariel/tests")

In [3]:
ncar_data.info

0,1
Name,/
Type,zarr.hierarchy.Group
Read-only,False
Store type,zarr.storage.DirectoryStore
No. members,6
No. arrays,6
No. groups,0
Arrays,"e, p, t, u, v, w"


In [4]:
ncar_data['e'].info

0,1
Name,/e
Type,zarr.core.Array
Data type,float32
Shape,"(2048, 2048, 2048)"
Chunk shape,"(256, 256, 256)"
Order,C
Read-only,False
Compressor,"Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)"
Store type,zarr.storage.DirectoryStore
No. bytes,34359738368 (32.0G)


<font color="orange">ok, the data is compressed. Let's test access time</font>

In [5]:
chunk_size = (256, 256, 256)
print("Chunk Size: ", np.product(np.array(chunk_size)) / (1024**2), " MB")

Chunk Size:  16.0  MB


## Note if you're using 256^3, you're exactly matching chunk size

In [6]:
ncar_data['e'][0,0,0]

0.03261032

In [7]:
a = np.empty(shape=(chunk_size))

## Test access times of 2 different chunks - Energy field

<font color="red">Randal: Why are these 2 runtimes so different? Is this a compression thing?</font>

In [17]:
%%timeit -o

a = ncar_data['e'][:256,:256,:256]

112 ms ± 3.18 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


<TimeitResult : 112 ms ± 3.18 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)>

In [9]:
# _ gets previous output
_.all_runs

[0.8684709370136261,
 0.8745953650213778,
 0.8786141079617664,
 0.9059019819833338,
 0.8999651320045814,
 0.8470146149629727,
 0.867334556998685]

<font color="orange">Make sure to only access across chunks if you want to</font>

In [10]:
%%timeit -o

a = ncar_data['e'][1024:1280,1024:1280,1024:1280]

28 ms ± 1.35 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


<TimeitResult : 28 ms ± 1.35 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)>

In [11]:
_.all_runs

[0.2539135670522228,
 0.2804607640719041,
 0.2766759099904448,
 0.29683197592385113,
 0.2937821949599311,
 0.28643092105630785,
 0.2713625560281798]

In [12]:
%%timeit -o

a = ncar_data['e'][1024:1280,512:768,512:768]

27.8 ms ± 1.26 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


<TimeitResult : 27.8 ms ± 1.26 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)>

In [13]:
256*7

1792

In [14]:
%%timeit -o

a = ncar_data['e'][1024:1280,768:1024,512:768]

26.8 ms ± 933 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


<TimeitResult : 26.8 ms ± 933 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)>

In [15]:
%%timeit -o

a = ncar_data['e'][1792:2048,1792:2048,1792:2048]

27.5 ms ± 1.08 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


<TimeitResult : 27.5 ms ± 1.08 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)>

<font color="red">Doesn't seem to be a caching thing - first run is as fast as the others</font>

In [16]:
_.all_runs

[0.2818707380210981,
 0.2593551389873028,
 0.2626262679696083,
 0.2740209079347551,
 0.26869386492762715,
 0.2898360730614513,
 0.2853171930182725]

### Trying this with Medium compressible variable - t

In [25]:
var = 't'

In [34]:
ncar_data[var].info

0,1
Name,/t
Type,zarr.core.Array
Data type,float32
Shape,"(2048, 2048, 2048)"
Chunk shape,"(256, 256, 256)"
Order,C
Read-only,False
Compressor,"Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)"
Store type,zarr.storage.DirectoryStore
No. bytes,34359738368 (32.0G)


In [31]:
%%timeit -o

a = ncar_data[var][:256,:256,:256]

97.7 ms ± 1.83 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


<TimeitResult : 97.7 ms ± 1.83 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)>

In [33]:
%%timeit -o

a = ncar_data[var][1024:1280,1024:1280,1024:1280]

49.8 ms ± 2.17 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


<TimeitResult : 49.8 ms ± 2.17 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)>

In [28]:
%%timeit -o

a = ncar_data[var][1024:1280,768:1024,512:768]

44.2 ms ± 972 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


<TimeitResult : 44.2 ms ± 972 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)>

In [29]:
%%timeit -o

a = ncar_data[var][1792:2048,1792:2048,1792:2048]

37.1 ms ± 2.82 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


<TimeitResult : 37.1 ms ± 2.82 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)>

## Least compressible var - w (Storage Ratio 1.3)

In [35]:
var = 'w'

In [36]:
ncar_data[var].info

0,1
Name,/w
Type,zarr.core.Array
Data type,float32
Shape,"(2048, 2048, 2048)"
Chunk shape,"(256, 256, 256)"
Order,C
Read-only,False
Compressor,"Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)"
Store type,zarr.storage.DirectoryStore
No. bytes,34359738368 (32.0G)


In [47]:
%%timeit -o

a = ncar_data[var][:256,:256,:256]

112 ms ± 2.38 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


<TimeitResult : 112 ms ± 2.38 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)>

In [48]:
%%timeit -o

a = ncar_data[var][1024:1280,1024:1280,1024:1280]

102 ms ± 1.43 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


<TimeitResult : 102 ms ± 1.43 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)>

In [49]:
%%timeit -o

a = ncar_data[var][1024:1280,768:1024,512:768]

101 ms ± 2.67 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


<TimeitResult : 101 ms ± 2.67 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)>

In [50]:
%%timeit -o

a = ncar_data[var][1792:2048,1792:2048,1792:2048]

93.1 ms ± 3.56 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


<TimeitResult : 93.1 ms ± 3.56 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)>

<font color="orange"> Need to create some uncompressed data - skip this on later runs</font>

In [53]:
fsstore = zarr.storage.FSStore('ncar_zarr_uncompressed/e')

In [56]:
ncar_data['e'].shape

(2048, 2048, 2048)

In [59]:
# https://zarr.readthedocs.io/en/stable/api/creation.html#zarr.creation.array

# e_arr = zarr.creation.array(ncar_data['e'], compressor=None)

In [60]:
# e_arr.info

0,1
Type,zarr.core.Array
Data type,float32
Shape,"(2048, 2048, 2048)"
Chunk shape,"(256, 256, 256)"
Order,C
Read-only,False
Compressor,
Store type,builtins.dict
No. bytes,34359738368 (32.0G)
No. bytes stored,34359738622 (32.0G)


In [62]:
# zarr.save("ncar_zarr_uncompressed/e", e_arr)

In [None]:
ncar_e_uncompressed = zarr.open("ncar_zarr_uncompressed/e")

In [None]:
ncar_e_uncompressed.info

### Sequential access of Uncompressed

In [None]:
%%timeit -o -n 1 -r 1

a = ncar_e_uncompressed[:256,:256,:256]

In [None]:
%%timeit -o -n 1 -r 1

a = ncar_data[var][1024:1280,1024:1280,1024:1280]

In [None]:
%%timeit -o -n 1 -r 1

a = ncar_data[var][1024:1280,768:1024,512:768]

In [None]:
%%timeit -o -n 1 -r 1

a = ncar_data[var][1792:2048,1792:2048,1792:2048]