In [1]:
import zsparse
import zarr
import numpy as np
import scipy.sparse as ss

# csr and csc classes
Can initialize from a dense numpy array, a tuple of (data,indices,indptr), or a scipy matrix.

In [2]:
mat = ss.random(1000,1000, density=.1, format='csr')

In [3]:
csr_test = zsparse.csr(mat,shape=mat.shape)

In [4]:
csr_test

<Compressed Sparse Row Matrix, shape=(1000, 1000), nnz=100000, bytes_stored = 807.7K>

In [5]:
csr_test.info

0,1
Type,zsparse.csr.csr
Format,Compressed Sparse Row Matrix
Data type,float64
Shape,"(1000, 1000)"
nnz,100000
Density,0.1
Order,C
Read-only,True
Persistent,False
Compressor,"Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)"


In [6]:
csr_test.to_scipy()

<1000x1000 sparse matrix of type '<class 'numpy.float64'>'
	with 100000 stored elements in Compressed Sparse Row format>

In [7]:
csr_test.todense()

array([[0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.63143508, ..., 0.        , 0.        ,
        0.        ],
       [0.27689167, 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ]])

In [8]:
csr_test.append(mat)
csr_test

<Compressed Sparse Row Matrix, shape=(2000, 1000), nnz=200000, bytes_stored = 1.6M>

# indexing

In [9]:
csr_test[0,2]

0

In [10]:
csr_test[200:600,:]

<Compressed Sparse Row Matrix, shape=(399, 1000), nnz=39858, bytes_stored = 322.6K>

In [11]:
csr_test[200:600,500:700]

<Compressed Sparse Row Matrix, shape=(400, 200), nnz=8040, bytes_stored = 64.8K>

In [12]:
csr_test[np.arange(80),:]

<Compressed Sparse Row Matrix, shape=(80, 1000), nnz=8086, bytes_stored = 66.2K>

In [13]:
csr_test[[0,5,8,12,55,500],5]

<Compressed Sparse Row Matrix, shape=(6, 1), nnz=0, bytes_stored = 1005>

Does not support indexing with lists or arrays in the non-compressed dimension

In [14]:
csr_test[500:600,[1,5,8,9,60]]

NotImplementedError: Indexing with a <class 'numpy.ndarray'> in the second dimension of class <class 'zsparse.csr.csr'> is not supported.

# Persistent matrices
Just like with zarr arrays, we can make persistent structures


In [15]:
# save our matrix
zsparse.save(csr_test,'csr_test.zarr')

In [16]:
# we could also just save the scipy matrix
zsparse.save(mat,'from_scipy.zarr')

<Compressed Sparse Row Matrix, shape=(1000, 1000), nnz=100000, bytes_stored = 807.7K>

In [17]:
# we can also just make our matricies persistent from the beginning
store = zarr.DirectoryStore('csr_test2')
csr_test2 = zsparse.csr(mat,shape=mat.shape,store=store)

# loading

In [18]:
csr_test3 = zsparse.load('from_scipy.zarr')

This is essentially the same as zarr.open(). None of the matrix data is actually loaded into memory. Only the metadata loads. All the same operations work though:

In [19]:
csr_test3[500:,:]

<Compressed Sparse Row Matrix, shape=(499, 1000), nnz=49643, bytes_stored = 401.6K>

# Saving and loading pydata/sparse arrays
Although zsparse does not (yet?) have a COO class, you can still save and load sparse arrays:

In [20]:
import sparse

In [21]:
s = sparse.random((1000, 1000), density=0.1)

In [22]:
store = zarr.DirectoryStore('save_COO.zarr')
zsparse.save(s,store)

In [23]:
coo_array = zsparse.load('save_COO.zarr')

In [24]:
type(coo_array), coo_array

(sparse.coo.core.COO,
 <COO: shape=(1000, 1000), dtype=float64, nnz=100000, fill_value=0.0>)