In [18]:
import numpy as np
import numba

def sliced_ellpack_spmv_single_thread(N, slice_ptr, colidx, val, x, slice_height):
    """Sliced ELLPACK format based SpMV (y=Ax)"""
    
    y = np.zeros(N, dtype=np.float64)
    for s in range(0, N, slice_height):
        for r in range(s, s + slice_height, 1):
            s_ptr = int(s / slice_height)
            for i in range(slice_ptr[s_ptr] + r - s, slice_ptr[s_ptr + 1], slice_height):
                y[r] += x[colidx[i]] * val[i]
    
    return y

@numba.njit
def sliced_ellpack_spmv_multi_thread(N, slice_ptr, colidx, val, x, slice_height):
    """Sliced ELLPACK format based SpMV (y=Ax)"""
    
    y = np.zeros(N, dtype=np.float64)
    for s in numba.prange(0, N, slice_height):
        for r in range(s, s + slice_height, 1):
            s_ptr = int(s / slice_height)
            for i in range(slice_ptr[s_ptr] + r - s, slice_ptr[s_ptr + 1], slice_height):
                y[r] += x[colidx[i]] * val[i]
    
    return y

In [19]:
#[[a, 0, b, 0],
# [0, c, 0, d],
# [0, e, 0, 0],
# [0, 0, 0, f]]

# slice 1: [[a, b], [c, d]]
# slice 2: [[e], [f]]

# a = 1.11, b = 3.33, c = 2.22, d = 4.44, e = 5.55, f = 6.66

N = 4 # number of row
slice_height = 2
colidx = np.array([0, 1, 2, 3, 1, 3]) # column indices
val = np.array([1.11, 2.22, 3.33, 4.44, 5.55, 6.66], dtype=np.float64) # nonzero values and padded zeros
slice_ptr = np.array([0, 4, 6])
x = np.array([2.22, 3.33, 4.44, 5.55], dtype=np.float64)

In [30]:
%time y = sliced_ellpack_spmv_single_thread(N, slice_ptr, colidx, val, x, slice_height)

CPU times: user 43 µs, sys: 1e+03 ns, total: 44 µs
Wall time: 46 µs


In [43]:
%time y = sliced_ellpack_spmv_multi_thread(N, slice_ptr, colidx, val, x, slice_height)

CPU times: user 19 µs, sys: 1e+03 ns, total: 20 µs
Wall time: 22.9 µs


In [44]:
y

array([17.2494, 32.0346, 18.4815, 36.963 ])