# GEMV (cortex14)

In [1]:
import iarray as ia
import numpy as np
import scipy.io
import scipy.sparse

## Dense Arrays

### In-memory

#### ironArray

In [2]:
_ = ia.set_config(dtype=np.float32, codec=ia.Codecs.LZ4, clevel=9, btune=False,
                  enforce_frame=False, seed=0, nthreads=14)

In [3]:
shape = (25000, 13859)
chunks = (4096, 2048)
blocks = (64, 2048)


a_ia = ia.random.normal(shape, 3, 2, chunks=chunks, blocks=blocks, fp_mantissa_bits=4)


a_ia.info

0,1
type,IArray
shape,"(25000, 13859)"
chunks,"(4096, 2048)"
blocks,"(64, 2048)"
cratio,3.15


In [4]:
b_ia = ia.linspace((a_ia.shape[1],), 2, 10, chunks=(a_ia.chunks[1],), blocks=(a_ia.blocks[1],))

b_ia.info

0,1
type,IArray
shape,"(13859,)"
chunks,"(2048,)"
blocks,"(2048,)"
cratio,3.14


In [5]:
%%timeit

c_ia = ia.gemv(a_ia, b_ia, chunks=(a_ia.chunks[0],), blocks=(a_ia.blocks[0],))

50.3 ms ± 405 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [6]:
c_ia = ia.gemv(a_ia, b_ia, chunks=(a_ia.chunks[0],), blocks=(a_ia.blocks[0],))

#### NumPy

In [7]:
a_np = a_ia.data

In [8]:
b_np = b_ia.data

In [9]:
%%timeit

c_np = a_np.dot(b_np)

20.1 ms ± 118 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [10]:
c_np = a_np.dot(b_np)

In [11]:
np.testing.assert_allclose(c_np, c_ia.data, rtol=1e-5 if a_ia.dtype == np.float32 else 1e-12)

### On-disk

#### ironArray

In [12]:
a_ia.copy(chunks=chunks, blocks=blocks, urlpath="dense.iarray", mode="w")

a_ia2 = ia.open("dense.iarray")

a_ia2.info

0,1
type,IArray
shape,"(25000, 13859)"
chunks,"(4096, 2048)"
blocks,"(64, 2048)"
cratio,3.15


In [13]:
%%timeit

c_ia2 = ia.gemv(a_ia2, b_ia, chunks=(a_ia.chunks[0],), blocks=(a_ia.blocks[0],))

54.8 ms ± 625 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [14]:
c_ia2 = ia.gemv(a_ia2, b_ia, chunks=(a_ia.chunks[0],), blocks=(a_ia.blocks[0],))

#### NumPy

In [15]:
np.save("dense", a_np)

In [16]:
%%timeit

a_np2 = np.load("dense.npy")
c_np2 = a_np2.dot(b_np)

331 ms ± 12.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [17]:
c_np2 = a_np.dot(b_np)

In [18]:
np.testing.assert_allclose(c_np2, c_ia2.data, rtol=1e-5 if a_ia.dtype == np.float32 else 1e-12)

### Bonus: High compression ratio

#### ironArray

In [19]:
a_ia = ia.linspace(shape, 3, 45.5, chunks=chunks, blocks=blocks)


a_ia.info

0,1
type,IArray
shape,"(25000, 13859)"
chunks,"(4096, 2048)"
blocks,"(64, 2048)"
cratio,61.45


In [20]:
b_ia = ia.linspace((a_ia.shape[1],), 2, 10, chunks=(a_ia.chunks[1],), blocks=(a_ia.blocks[1],))

b_ia.info

0,1
type,IArray
shape,"(13859,)"
chunks,"(2048,)"
blocks,"(2048,)"
cratio,3.14


In [21]:
%%timeit

c_ia = ia.gemv(a_ia, b_ia, chunks=(a_ia.chunks[0],), blocks=(a_ia.blocks[0],))

31.7 ms ± 1.72 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [22]:
c_ia = ia.gemv(a_ia, b_ia, chunks=(a_ia.chunks[0],), blocks=(a_ia.blocks[0],))

#### NumPy

In [23]:
a_np = a_ia.data

In [24]:
b_np = b_ia.data

In [25]:
%%timeit

c_np = a_np.dot(b_np)

20.1 ms ± 247 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [26]:
c_np = a_np.dot(b_np)

In [27]:
np.testing.assert_allclose(c_np, c_ia.data, rtol=1e-5 if a_ia.dtype == np.float32 else 1e-12)

## Sparse Arrays

In [28]:
_ = ia.set_config(dtype=np.float64, codec=ia.Codecs.LZ4, clevel=9, btune=False,
                  enforce_frame=False, seed=0, nthreads=14)

### In-memory

#### SciPy

In [29]:
urlpath = "./human_gene1/human_gene1.mtx"

a_sp = scipy.io.mmread(urlpath)

print(f"Sparsity: {a_sp.getnnz() / np.prod(a_sp.shape)}")

Sparsity: 0.049683884579713435


In [30]:
shape = a_sp.shape

chunks = (4096, 2048)
blocks = (64, 2048)

In [31]:
b_sp = np.linspace(0, 1, shape[1], dtype=a_sp.dtype)

In [32]:
%%timeit

c_sp = a_sp.dot(b_sp)

40.7 ms ± 105 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [33]:
c_sp = a_sp.dot(b_sp)

#### ironArray

In [34]:
a_ia = ia.numpy2iarray(a_sp.toarray(), chunks=chunks, blocks=blocks)

a_ia.info

0,1
type,IArray
shape,"(22283, 22283)"
chunks,"(4096, 2048)"
blocks,"(64, 2048)"
cratio,6.81


In [35]:
b_ia = ia.numpy2iarray(b_sp, chunks=(chunks[1],), blocks=(blocks[1],))

b_ia.info

0,1
type,IArray
shape,"(22283,)"
chunks,"(2048,)"
blocks,"(2048,)"
cratio,2.51


In [36]:
%%timeit

c_ia = ia.gemv(a_ia, b_ia, chunks=(chunks[0],), blocks=(blocks[0],))

188 ms ± 3.05 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [37]:
c_ia = ia.gemv(a_ia, b_ia, chunks=(chunks[0],), blocks=(blocks[0],))

In [38]:
np.testing.assert_allclose(c_sp, c_ia.data, rtol=1e-5 if a_ia.dtype == np.float32 else 1e-12)

### On-disk

#### SciPy

In [39]:
%%timeit

a_sp2 = scipy.io.mmread(urlpath)
c_sp2 = a_sp.dot(b_sp)

13.9 s ± 82.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [40]:
c_sp2 = a_sp.dot(b_sp)

#### ironArray

In [41]:
a_ia.copy(chunks=chunks, blocks=blocks, urlpath="sparse.iarray", mode="w")

a_ia2 = ia.open("sparse.iarray")

a_ia2.info

0,1
type,IArray
shape,"(22283, 22283)"
chunks,"(4096, 2048)"
blocks,"(64, 2048)"
cratio,6.81


In [42]:
%%timeit

c_ia2 = ia.gemv(a_ia2, b_ia, chunks=(a_ia.chunks[0],), blocks=(a_ia.blocks[0],))

205 ms ± 4.34 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [43]:
c_ia2 = ia.gemv(a_ia2, b_ia, chunks=(a_ia.chunks[0],), blocks=(a_ia.blocks[0],))