## Creating an array

In [1]:
import zarr

In [2]:
z = zarr.zeros((10000, 10000), chunks=(1000, 1000), dtype='i4')

In [3]:
z

<zarr.core.Array (10000, 10000) int32>

## Reading and Writing data

In [4]:
z[:] = 42

In [5]:
import numpy as np

In [6]:
z[0, :] = np.arange(10000)
z[:, 0] = np.arange(10000)

In [7]:
z

<zarr.core.Array (10000, 10000) int32>

In [8]:
z[0, 0]

0

In [10]:
z[-1, -1]

42

In [11]:
z[0, :]

array([   0,    1,    2, ..., 9997, 9998, 9999], dtype=int32)

In [12]:
z[:]

array([[   0,    1,    2, ..., 9997, 9998, 9999],
       [   1,   42,   42, ...,   42,   42,   42],
       [   2,   42,   42, ...,   42,   42,   42],
       ...,
       [9997,   42,   42, ...,   42,   42,   42],
       [9998,   42,   42, ...,   42,   42,   42],
       [9999,   42,   42, ...,   42,   42,   42]], dtype=int32)

## Persistent arrays

In [13]:
z1 = zarr.open('data/example.zarr', mode='w', shape=(10000, 10000), chunks=(1000, 1000), dtype='i4')

In [14]:
z1

<zarr.core.Array (10000, 10000) int32>

In [15]:
z1[:] = 42
z1[0, :] = np.arange(10000)
z1[:, 0] = np.arange(10000)

In [16]:
z2 = zarr.open('data/example.zarr', mode='r')

In [18]:
np.all(z1[:] == z2[:])

True

In [20]:
a = np.arange(10)

In [21]:
zarr.save('data/example.zarr', a)

In [22]:
zarr.load('data/example.zarr')

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

## Resizing and appending 

In [23]:
z = zarr.zeros(shape=(10000, 10000), chunks=(1000, 1000))
z[:] = 42

In [24]:
z

<zarr.core.Array (10000, 10000) float64>

In [25]:
z.chunks

(1000, 1000)

In [26]:
z.resize(20000, 10000)

In [27]:
z.shape

(20000, 10000)

In [28]:
z.chunks

(1000, 1000)

In [29]:
a = np.arange(10000000, dtype='i4').reshape(10000, 1000)

In [30]:
z = zarr.array(a, chunks=(1000, 100))

In [31]:
z.shape

(10000, 1000)

In [32]:
z.append(a)

(20000, 1000)

In [33]:
z

<zarr.core.Array (20000, 1000) int32>

In [34]:
z.append(np.vstack([a, a]), axis=1)

(20000, 2000)

In [35]:
z.shape

(20000, 2000)

## Compressors 

In [36]:
from numcodecs import Blosc

In [37]:
compressor = Blosc(cname='zstd', clevel=3, shuffle=Blosc.BITSHUFFLE)

In [38]:
compressor

Blosc(cname='zstd', clevel=3, shuffle=BITSHUFFLE, blocksize=0)

In [43]:
data = np.arange(100000000, dtype='i4').reshape(10000, 10000)

In [44]:
z = zarr.array(data, chunks=(1000, 1000), compressor=compressor)

In [45]:
z.compressor

Blosc(cname='zstd', clevel=3, shuffle=BITSHUFFLE, blocksize=0)

In [46]:
z.info

0,1
Type,zarr.core.Array
Data type,int32
Shape,"(10000, 10000)"
Chunk shape,"(1000, 1000)"
Order,C
Read-only,False
Compressor,"Blosc(cname='zstd', clevel=3, shuffle=BITSHUFFLE, blocksize=0)"
Store type,builtins.dict
No. bytes,400000000 (381.5M)
No. bytes stored,3242241 (3.1M)


In [47]:
from numcodecs import blosc

In [48]:
blosc.list_compressors()

['blosclz', 'lz4', 'lz4hc', 'snappy', 'zlib', 'zstd']

In [49]:
from numcodecs import Zstd

In [50]:
z = zarr.array(
    np.arange(100000000, dtype='i4').reshape(10000, 10000),
    chunks=(1000, 1000),
    compressor=Zstd(level=1),
)

In [51]:
z

<zarr.core.Array (10000, 10000) int32>

In [52]:
z.compressor

Zstd(level=1)

In [53]:
import lzma

In [54]:
lzma_filters = [dict(id=lzma.FILTER_DELTA, dist=4), dict(id=lzma.FILTER_LZMA2, preset=1)]

In [55]:
from numcodecs import LZMA

In [56]:
compressor = LZMA(filters=lzma_filters)

In [57]:
z = zarr.array(
    np.arange(100000000, dtype='i4').reshape(10000, 10000),
    chunks=(1000, 1000),
    compressor=compressor,
)

In [58]:
z.compressor

LZMA(format=1, check=-1, preset=None, filters=[{'dist': 4, 'id': 3}, {'id': 33, 'preset': 1}])

In [59]:
z.info

0,1
Type,zarr.core.Array
Data type,int32
Shape,"(10000, 10000)"
Chunk shape,"(1000, 1000)"
Order,C
Read-only,False
Compressor,"LZMA(format=1, check=-1, preset=None, filters=[{'dist': 4, 'id': 3}, {'id': 33, 'preset': 1}])"
Store type,builtins.dict
No. bytes,400000000 (381.5M)
No. bytes stored,254829 (248.9K)


## Groups

In [60]:
root = zarr.group()

In [61]:
root

<zarr.hierarchy.Group '/'>

In [62]:
foo = root.create_group('foo')
bar = foo.create_group('bar')

In [63]:
z1 = bar.zeros('baz', shape=(10000, 10000), chunks=(1000, 1000), dtype='i4')

In [64]:
z1

<zarr.core.Array '/foo/bar/baz' (10000, 10000) int32>

In [66]:
root['foo']

<zarr.hierarchy.Group '/foo'>

In [67]:
root['foo/bar']

<zarr.hierarchy.Group '/foo/bar'>

In [68]:
root['foo/bar/baz']

<zarr.core.Array '/foo/bar/baz' (10000, 10000) int32>

In [70]:
root.tree()

## Array and group diagnostics

In [71]:
root = zarr.group()

In [72]:
foo = root.create_group('foo')

In [73]:
bar = foo.zeros('bar', shape=1000000, chunks=100000, dtype='i8')

In [74]:
bar[:] = 42

In [75]:
baz = foo.zeros('baz', shape=(1000, 1000), chunks=(100, 100), dtype='i4')

In [76]:
baz

<zarr.core.Array '/foo/baz' (1000, 1000) int32>

In [77]:
baz[:] = 42

In [78]:
root.info

0,1
Name,/
Type,zarr.hierarchy.Group
Read-only,False
Store type,zarr.storage.DictStore
No. members,1
No. arrays,0
No. groups,1
Groups,foo


In [79]:
foo.info

0,1
Name,/foo
Type,zarr.hierarchy.Group
Read-only,False
Store type,zarr.storage.DictStore
No. members,2
No. arrays,2
No. groups,0
Arrays,"bar, baz"


In [80]:
bar.info

0,1
Name,/foo/bar
Type,zarr.core.Array
Data type,int64
Shape,"(1000000,)"
Chunk shape,"(100000,)"
Order,C
Read-only,False
Compressor,"Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)"
Store type,zarr.storage.DictStore
No. bytes,8000000 (7.6M)


In [81]:
root.tree()

In [82]:
print(root.tree())

/
 └── foo
     ├── bar (1000000,) int64
     └── baz (1000, 1000) int32


## User attributes

In [83]:
root = zarr.group()

In [84]:
root.attrs['foo'] = 'bar'

In [85]:
z = root.zeros('zzz', shape=(10000, 10000))

In [86]:
z.attrs['baz'] = 42

In [87]:
z.attrs['qux'] = [1, 4, 7, 12]

In [88]:
sorted(root.attrs)

['foo']

In [89]:
'foo' in root.attrs

True

In [90]:
z.attrs['qux']

[1, 4, 7, 12]

## Advanced indexing

### Indexing with coordinate arrays

In [91]:
z = zarr.array(np.arange(10))

In [92]:
z

<zarr.core.Array (10,) int64>

In [93]:
z[:]

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [94]:
z.get_coordinate_selection([1, 4])

array([1, 4])

In [95]:
z.set_coordinate_selection([1, 4], [-1, -2])

In [96]:
z[:]

array([ 0, -1,  2,  3, -2,  5,  6,  7,  8,  9])

In [97]:
z = zarr.array(np.arange(15).reshape(3, 5))

In [98]:
z[:]

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [99]:
z.get_coordinate_selection(([0, 2], [1, 3]))

array([ 1, 13])

In [100]:
b = np.arange(15).reshape(3, 5)

In [101]:
b

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [106]:
b[2, 3]

13

In [108]:
z.vindex[[0, 2], [1, 3]]

array([ 1, 13])

## Copying/migrating data

In [110]:
import h5py

In [111]:
source = h5py.File('data/example.h5', mode='w')

In [112]:
foo = source.create_group('foo')

In [114]:
baz = foo.create_dataset('bar/baz', data=np.arange(100), chunks=(50,))

In [115]:
spam = source.create_dataset('spam', data=np.arange(100, 200), chunks=(30,))

In [116]:
zarr.tree(source)

In [117]:
print(zarr.tree(source))

/
 ├── foo
 │   └── bar
 │       └── baz (100,) int64
 └── spam (100,) int64


In [118]:
dest = zarr.open_group('data/example.zarr', mode='w')

In [119]:
from sys import stdout

In [120]:
zarr.copy(source['foo'], dest, log=stdout)

copy /foo
copy /foo/bar
copy /foo/bar/baz (100,) int64
all done: 3 copied, 0 skipped, 800 bytes copied


(3, 0, 800)

In [121]:
print(dest.tree())

/
 └── foo
     └── bar
         └── baz (100,) int64


## Chunk memory layout

In [122]:
a = np.arange(100000000, dtype='i4').reshape(10000, 10000).T

In [123]:
a.shape

(10000, 10000)

In [124]:
c = zarr.array(a, chunks=(1000, 1000))

In [125]:
c

<zarr.core.Array (10000, 10000) int32>

In [126]:
c.info

0,1
Type,zarr.core.Array
Data type,int32
Shape,"(10000, 10000)"
Chunk shape,"(1000, 1000)"
Order,C
Read-only,False
Compressor,"Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)"
Store type,builtins.dict
No. bytes,400000000 (381.5M)
No. bytes stored,6696010 (6.4M)


In [127]:
f = zarr.array(a, chunks=(1000, 1000), order='F')

In [128]:
f.info

0,1
Type,zarr.core.Array
Data type,int32
Shape,"(10000, 10000)"
Chunk shape,"(1000, 1000)"
Order,F
Read-only,False
Compressor,"Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)"
Store type,builtins.dict
No. bytes,400000000 (381.5M)
No. bytes stored,4684636 (4.5M)


## Parallel computing and synchronization

In [129]:
z = zarr.zeros(
    (10000, 10000), chunks=(1000, 1000), dtype='i4', synchronizer=zarr.ThreadSynchronizer()
)

In [130]:
z

<zarr.core.Array (10000, 10000) int32>

In [131]:
z.info

0,1
Type,zarr.core.Array
Data type,int32
Shape,"(10000, 10000)"
Chunk shape,"(1000, 1000)"
Order,C
Read-only,False
Compressor,"Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)"
Synchronizer type,zarr.sync.ThreadSynchronizer
Store type,builtins.dict
No. bytes,400000000 (381.5M)


In [137]:
%load_ext version_information

The version_information extension is already loaded. To reload it, use:
  %reload_ext version_information


In [138]:
%version_information

Software,Version
Python,3.6.6 64bit [GCC 4.2.1 Compatible Apple LLVM 6.1.0 (clang-602.0.53)]
IPython,7.0.1
OS,Darwin 17.7.0 x86_64 i386 64bit
Fri Oct 12 14:11:06 2018 MDT,Fri Oct 12 14:11:06 2018 MDT


In [139]:
%version_information zarr, numpy

Software,Version
Python,3.6.6 64bit [GCC 4.2.1 Compatible Apple LLVM 6.1.0 (clang-602.0.53)]
IPython,7.0.1
OS,Darwin 17.7.0 x86_64 i386 64bit
zarr,2.2.0
numpy,1.15.1
Fri Oct 12 14:11:13 2018 MDT,Fri Oct 12 14:11:13 2018 MDT
