In [12]:
import zarr # Check version
import numpy as np

zarr.__version__

'2.13.2'

In [4]:
ncar_data = zarr.open("sciserver-experimental/ariel/tests")

In [5]:
ncar_data.info

0,1
Name,/
Type,zarr.hierarchy.Group
Read-only,False
Store type,zarr.storage.DirectoryStore
No. members,6
No. arrays,6
No. groups,0
Arrays,"e, p, t, u, v, w"


In [6]:
ncar_data['e'].info

0,1
Name,/e
Type,zarr.core.Array
Data type,float32
Shape,"(2048, 2048, 2048)"
Chunk shape,"(256, 256, 256)"
Order,C
Read-only,False
Compressor,"Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)"
Store type,zarr.storage.DirectoryStore
No. bytes,34359738368 (32.0G)


<font color="orange">ok, the data is compressed. Let's test access time</font>

In [14]:
chunk_size = (256, 256, 256)
print("Chunk Size: ", np.product(np.array(chunk_size)) / (1024**2), " MB")

Chunk Size:  16.0  MB


## Note if you're using 256^3, you're exactly matching chunk size

In [8]:
ncar_data['e'][0,0,0]

0.03261032

In [24]:
a = np.empty(shape=(chunk_size))

## Test access times of 2 different chunks - Energy field

<font color="red">Randal: Why are these 2 runtimes so different? Is this a compression thing?</font>

In [48]:
%%timeit -o

a = ncar_data['e'][:256,:256,:256]

157 ms ± 9.07 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


<TimeitResult : 157 ms ± 9.07 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)>

In [44]:
# _ gets previous output
_.all_runs

[1.602025124244392,
 1.6301844445988536,
 1.5952623495832086,
 1.4430317925289273,
 1.4496843367815018,
 1.4660517061129212,
 1.4731101002544165]

<font color="orange">Make sure to only access across chunks if you want to</font>

In [50]:
%%timeit -o

a = ncar_data['e'][1024:1280,1024:1280,1024:1280]

53.2 ms ± 12.2 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


<TimeitResult : 53.2 ms ± 12.2 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)>

In [38]:
_.all_runs

[0.4810574548318982,
 0.5348023287951946,
 0.4648024532943964,
 0.4390142047777772,
 0.44178031757473946,
 0.4038350861519575,
 0.4066421911120415]

In [54]:
%%timeit -o

a = ncar_data['e'][1024:1280,512:768,512:768]

39.4 ms ± 1.85 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


<TimeitResult : 39.4 ms ± 1.85 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)>

In [56]:
256*7

1792

In [55]:
%%timeit -o

a = ncar_data['e'][1024:1280,768:1024,512:768]

43 ms ± 1.66 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


<TimeitResult : 43 ms ± 1.66 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)>

In [58]:
%%timeit -o

a = ncar_data['e'][1792:2048,1792:2048,1792:2048]

41.7 ms ± 1.2 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


<TimeitResult : 41.7 ms ± 1.2 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)>

<font color="red">Doesn't seem to be a caching thing - first run is as fast as the others</font>

In [59]:
_.all_runs

[0.39545581489801407,
 0.42266143672168255,
 0.41311127599328756,
 0.41227464005351067,
 0.4161639241501689,
 0.4185048080980778,
 0.43859389889985323]