# Matrix multiplication

In [1]:
import iarray as ia
import numpy as np
import os

In [2]:
%load_ext memprofiler

In [3]:
%%mprof_run
nrows = 100_000 # number of rows in matrix am
ncols = 25000   # number of columns in first matrix
ncols2 = 1000   # number of columns in second matrix

shape = (nrows, ncols, ncols2)
amshape = (shape[0], shape[1])
bmshape = (shape[1], shape[2])
# Obtain optimal chunk and block shapes
mparams = ia.matmul_params(amshape, bmshape, itemsize=8)
amchunks, amblocks, bmchunks, bmblocks = mparams

filename = "arr-gemm.iarr"
#ia.remove_urlpath(filename)
if os.path.exists(filename):
    am = ia.open(filename)
else: 
    # btune does not represent an advantage when the number of chunks is small
    ia.set_config(btune=False)
    am = ia.random.normal(amshape, 3, 2, chunks=amchunks, blocks=amblocks, urlpath=filename, fp_mantissa_bits=20)
print(am.info)

w = np.ones(bmshape)
bm = ia.numpy2iarray(w, chunks=bmchunks, blocks=bmblocks)
print(bm.info)

type   : IArray
shape  : (100000, 25000)
chunks : (4116, 4116)
blocks : (147, 147)
cratio : 2.83

type   : IArray
shape  : (25000, 1000)
chunks : (4116, 1029)
blocks : (147, 147)
cratio : 560.34

memprofiler: used 197.71 MiB RAM (peak of 230.02 MiB) in 0.0847 s, total RAM usage 403.98 MiB


In [4]:
!vmtouch -e $filename

           Files: 1
     Directories: 0
   Evicted Pages: 2047703 (7G)
         Elapsed: 0.35874 seconds


In [5]:
import zarr
import dask.array as da

zfilename = "arr-gemm.zarr"
#ia.remove_urlpath(zfilename)


if not os.path.exists(zfilename):
    # Matrix zam goes to disk
    zam = zarr.empty(store=zfilename, shape=amshape, chunks=amchunks)
    am.copyto(zam)
zam = zarr.open(zfilename)
print(zam.info)
dask_a = da.from_zarr(zam)
dask_a_vmtouch = da.from_zarr(zam)

zbm = zarr.create(shape=bmshape, chunks=bmchunks)
bm.copyto(zbm)
print(zbm.info)
dask_b = da.from_zarr(zbm)
dask_b_vmtouch = da.from_zarr(zbm)

Type               : zarr.core.Array
Data type          : float64
Shape              : (100000, 25000)
Chunk shape        : (4116, 4116)
Order              : C
Read-only          : False
Compressor         : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)
Store type         : zarr.storage.DirectoryStore
No. bytes          : 20000000000 (18.6G)
No. bytes stored   : 8768717910 (8.2G)
Storage ratio      : 2.3
Chunks initialized : 175/175

Type               : zarr.core.Array
Data type          : float64
Shape              : (25000, 1000)
Chunk shape        : (4116, 1029)
Order              : C
Read-only          : False
Compressor         : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)
Store type         : builtins.dict
No. bytes          : 200000000 (190.7M)
No. bytes stored   : 967595 (944.9K)
Storage ratio      : 206.7
Chunks initialized : 7/7



In [6]:
%%mprof_run zarr::novmtouch
res = da.matmul(dask_a, dask_b)
cshape = (zam.shape[0], zbm.shape[1])
c = zarr.create(shape=cshape)
da.to_zarr(res, c)
print(c.info)

Type               : zarr.core.Array
Data type          : float64
Shape              : (100000, 1000)
Chunk shape        : (3125, 63)
Order              : C
Read-only          : False
Compressor         : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)
Store type         : builtins.dict
No. bytes          : 800000000 (762.9M)
No. bytes stored   : 41768443 (39.8M)
Storage ratio      : 19.2
Chunks initialized : 512/512

memprofiler: used 4414.46 MiB RAM (peak of 9592.57 MiB) in 29.7401 s, total RAM usage 4832.46 MiB


In [7]:
!vmtouch -e $filename $zfilename

           Files: 177
     Directories: 1
   Evicted Pages: 4188593 (15G)
         Elapsed: 0.35201 seconds


In [8]:
%%mprof_run 2.zarr::
res_vmtouch = da.matmul(dask_a_vmtouch, dask_b_vmtouch)
cshape_vmtouch = (zam.shape[0], zbm.shape[1])
c_vmtouch = zarr.create(shape=cshape_vmtouch)
da.to_zarr(res_vmtouch, c_vmtouch)
print(c_vmtouch.info)

Type               : zarr.core.Array
Data type          : float64
Shape              : (100000, 1000)
Chunk shape        : (3125, 63)
Order              : C
Read-only          : False
Compressor         : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)
Store type         : builtins.dict
No. bytes          : 800000000 (762.9M)
No. bytes stored   : 41768443 (39.8M)
Storage ratio      : 19.2
Chunks initialized : 512/512

memprofiler: used 443.54 MiB RAM (peak of 4756.48 MiB) in 29.1714 s, total RAM usage 5276.01 MiB


In [9]:
%mprof_plot .*zarr::.* -t "Matrix multiplication computation"

In [10]:
del res
del res_vmtouch
del dask_a
del zam
del dask_b
del zbm
del c_vmtouch
del c
del dask_a_vmtouch
del dask_b_vmtouch
del w

In [11]:
npa = am.data
npb = bm.data

In [12]:
%%mprof_run 3.numpy::
npcm = np.matmul(npa, npb)

memprofiler: used 882.29 MiB RAM (peak of 882.29 MiB) in 14.5864 s, total RAM usage 25235.28 MiB


In [13]:
%mprof_plot .*numpy:: -t "Matrix multiplication computation"

In [14]:
del npa
del npb
del npcm

In [15]:
%%mprof_run iarray::novmtouch
iacm_opt = ia.matmul(am, bm)
print(iacm_opt.info)

type   : IArray
shape  : (100000, 1000)
chunks : (4116, 1029)
blocks : (147, 147)
cratio : 37.14

memprofiler: used 484.77 MiB RAM (peak of 484.77 MiB) in 14.8668 s, total RAM usage 5692.97 MiB


In [16]:
del iacm_opt
!vmtouch -e $filename 

           Files: 1
     Directories: 0
   Evicted Pages: 2047703 (7G)
         Elapsed: 0.3632 seconds


In [17]:
%%mprof_run 1.iarray::
iacm_opt_vmtouch = ia.matmul(am, bm)
print(iacm_opt_vmtouch.info)

type   : IArray
shape  : (100000, 1000)
chunks : (4116, 1029)
blocks : (147, 147)
cratio : 37.14

memprofiler: used 44.39 MiB RAM (peak of 44.39 MiB) in 14.3870 s, total RAM usage 5737.38 MiB


In [18]:
%mprof_plot .*iarray::.* -t "Matrix multiplication computation"

In [19]:
%mprof_plot 1.iarray:: 2.zarr:: 3.numpy:: -t "Matrix multiplication computation"

### tileDB

In [20]:
import tiledb
del iacm_opt_vmtouch
print(amchunks)
print(bmchunks)

(4116, 4116)
(4116, 1029)


In [21]:
adom = tiledb.Domain(
        tiledb.Dim(name="rows", domain=(0, amshape[0] - 1), dtype=np.int32, tile=amchunks[0]%amshape[0]),
        tiledb.Dim(name="cols", domain=(0, amshape[1] - 1), dtype=np.int32, tile=amchunks[1]%amshape[1]),
    )

# The array will be dense with a single attribute "a" so each (i,j) cell can store an integer.
filters = tiledb.FilterList([tiledb.ByteShuffleFilter(), tiledb.LZ4Filter(5)])
# The array will be dense with a single attribute "a" so each (i,j) cell can store an integer.
schema = tiledb.ArraySchema(
    domain=adom,  sparse=False, attrs=[tiledb.Attr(name="a", dtype=np.float64, filters=filters)]
)

# Create the (empty) array on disk.
array_name = "quickstart_dense"
#ia.remove_urlpath(array_name)

if not os.path.exists(array_name):
    tiledb.DenseArray.create(array_name, schema)
    with tiledb.DenseArray(array_name, mode="w") as A:
        A[:] = am.data
        
tilea = da.from_tiledb(array_name, attribute='a')
tilea_vmtouch = da.from_tiledb(array_name, attribute='a')

bdom = tiledb.Domain(
        tiledb.Dim(name="rows", domain=(0, bmshape[0] - 1), dtype=np.int32, tile=bmchunks[0]%bmshape[0]),
        tiledb.Dim(name="cols", domain=(0, bmshape[1] - 1), dtype=np.int32, tile=bmchunks[1]%bmshape[1]),
)

# The array will be dense with a single attribute "a" so each (i,j) cell can store an integer.
schemab = tiledb.ArraySchema(
    domain=bdom, sparse=False, attrs=[tiledb.Attr(name="b", dtype=np.float64, filters=filters)]
)

# Create the (empty) array on disk.
array_nameb = "quickstart_dense_b"
#ia.remove_urlpath(array_nameb)
if not os.path.exists(array_nameb):
    tiledb.DenseArray.create(array_nameb, schemab)
    with tiledb.DenseArray(array_nameb, mode="w") as B:
        B[:] = bm.data

tileb = da.from_tiledb(array_nameb, attribute='b')
tileb_vmtouch = da.from_tiledb(array_nameb, attribute='b')

ia.remove_urlpath("res_tile")
ia.remove_urlpath("res_tile_vmtouch")
del am
del bm

In [22]:
%%mprof_run tiledb::novmtouch
res_dask = da.matmul(tilea, tileb)
res_dask.to_tiledb("res_tile")


Increasing number of chunks by factor of 25



memprofiler: used -3454.53 MiB RAM (peak of 30.01 MiB) in 70.4253 s, total RAM usage 2293.78 MiB


In [23]:
del res_dask
del tilea
del tileb

!vmtouch -e $array_name

           Files: 5
     Directories: 4
   Evicted Pages: 2212260 (8G)
         Elapsed: 0.35788 seconds


In [24]:
%%mprof_run 4.tiledb::
res_dask_vmtouch = da.matmul(tilea_vmtouch, tileb_vmtouch)
res_dask_vmtouch.to_tiledb("res_tile_vmtouch")


Increasing number of chunks by factor of 25



memprofiler: used 69.52 MiB RAM (peak of 2292.21 MiB) in 71.6906 s, total RAM usage 2346.88 MiB


In [25]:
%mprof_plot .*tiledb::.* -t "Matrix multiplication computation"

In [26]:
%mprof_plot 1.iarray:: 2.zarr:: 3.numpy::  4.tiledb:: -t "Matrix multiplication computation"