# Benchmarks on matrix multiplication

Matrix multiplication can be defined as: ${A*B}_{ik} = a_i*b_k$

where $A$ is a $i \times j$ matrix

$B$ is a $j \times k$ matrix

and $a_i$ is the $i^{th}$ row vector of $A$

and $b_i$ is the $k^{th}$ column vector of $B$

In [None]:
We can write a matrix multiplication function using this definition

In [2]:
import numpy as np

In [4]:
def matrix_mult(a,b):
    if a.shape[1] == b.shape[0]:
        I,J = a.shape
        J,K = b.shape
        c=np.zeros(I*K).reshape(I,K)
        for i in range(I):
            ai=a[i,:]
            for k in range(K):
                bk=b[:,k]
                c[i,k] = (ai*bk).sum() # numpy step
    else:
        print('a and b must have compatible shapes')
    return(c)

We want to benchmark this against `np.dot` and a numba version

In [7]:
from numba import njit, jit

In [8]:
@njit(parallel=True)
def matrix_mult_numba_jit(a,b):
    if a.shape[1] == b.shape[0]:
        I,J = a.shape
        J,K = b.shape
        c=np.zeros(I*K).reshape(I,K)
        for i in range(I):
            ai=a[i,:]
            for k in range(K):
                bk=b[:,k]
                c[i,k] = (ai*bk).sum() # numpy step
    else:
        print('a and b must have compatible shapes')
    return(c)

In [9]:
@jit
def matrix_mult_numba_njit(a,b):
    if a.shape[1] == b.shape[0]:
        I,J = a.shape
        J,K = b.shape
        c=np.zeros(I*K).reshape(I,K)
        for i in range(I):
            ai=a[i,:]
            for k in range(K):
                bk=b[:,k]
                c[i,k] = (ai*bk).sum() # numpy step
    else:
        print('a and b must have compatible shapes')
    return(c)

In [10]:
i, j, k = 100,101,102
a = np.arange(i*j).reshape(i,j)
b = np.arange(j*k).reshape(j,k)

In [11]:
%timeit np.dot(a,b)

585 µs ± 3.75 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [12]:
%timeit matrix_mult(a,b)

34.2 ms ± 322 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [13]:
%timeit matrix_mult_numba_jit(a,b)

291 ms ± 8.54 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [14]:
%timeit matrix_mult_numba_njit(a,b)

2.13 ms ± 27.4 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


### benchmark different sizes of matrices


In [None]:
%%time
d_list = []
size_list = [10,100,1000]
for i in size_list:
    for j in size_list:
        a = np.arange(i*j).reshape(i,j)
        for k in size_list:
            if i>=j and j>=k:
                b = np.arange(j*k).reshape(j,k)
                t_np = %timeit -oq np.dot(a,b)
                t_plain = %timeit -oq matrix_mult(a,b)
                t_numba_jit = %timeit -oq matrix_mult_numba_jit(a,b)
                t_numba_njit = %timeit -oq matrix_mult_numba_njit(a,b)
                d_list.append({'t_np':t_np.average,
                               't_plain':t_plain.average,
                               't_numba_jit':t_numba_jit.average,
                               't_numba_jnit':t_numba_njit.average,
                               'i':i,
                               'j':j,
                               'k':k})

In [18]:
import pandas as pd

In [19]:
benchmark_df = pd.DataFrame(d_list)

In [33]:
numerics = ['float16', 'float32', 'float64']

benchmark_df.select_dtypes(include=numerics).divide(benchmark_df.t_plain)

Unnamed: 0,t_np,t_numba_jit,t_numba_jnit,t_plain,0
0,,,,,


In [29]:
ratio_benchmark_df = benchmark_df.filter(benchmark_df._get_numeric_data().columns.tolist()) / benchmark_df.t_plain

pd.concat([benchmark_df,ratio_benchmark_df])
           

Unnamed: 0,i,j,k,t_np,t_numba,t_plain
0,50,50,50,0.000081,0.000512,0.011323
1,50,50,100,0.000163,0.001021,0.022917
2,50,50,500,0.000838,0.005009,0.109832
3,50,50,1000,0.001927,0.010538,0.216234
4,50,50,5000,0.009639,0.055416,1.116051
5,50,100,50,0.000155,0.000581,0.011520
6,50,100,100,0.000291,0.001190,0.024774
7,50,100,500,0.002100,0.006729,0.117721
8,50,100,1000,0.004337,0.013888,0.234997
9,50,100,5000,0.029176,0.078274,1.202826
