In [28]:
from numba import cuda
import cupy as cp
import numpy as np

In [125]:
#size of matrix
matrix_size = 20
M1_cpu = np.random.randint(0,100,size=[matrix_size,matrix_size])
M2_cpu = np.random.randint(0,100,size=[matrix_size,matrix_size])
M3_cpu = np.zeros((matrix_size,matrix_size))

In [126]:
M1_gpu = cp.asarray(M1_cpu)
M2_gpu = cp.asarray(M2_cpu)
M3_gpu = cp.asarray(M3_cpu)

In [127]:
type(M1_gpu)

cupy._core.core.ndarray

In [137]:
threadsperblock = (16, 16)  
blockspergrid_x = int(np.ceil(M3_gpu.shape[0] / threadsperblock[0]))
blockspergrid_y = int(np.ceil(M3_gpu.shape[1] / threadsperblock[1]))
blockspergrid = (blockspergrid_x, blockspergrid_y)
print(blockspergrid)

(2, 2)


In [129]:
@cuda.jit
def matrix_multiplication_gpu(M1, M2, M3):
  x, y = cuda.grid(2)  
  if x < M3.shape[0] and y < M3.shape[1]:   
      temp = 0 
      for k in range(M1.shape[1]):
          temp += M1[x, k] * M2[k, y]
      M3[x, y] = temp


def matrix_multiplication_cpu(M1, M2, M3):
  x,y = M1.shape
  for i in range(x):
    for j in range(y):
      temp = 0
      for k in range(x):
        temp += M1[i][k] * M2[k][j]
      M3[i][j] = temp


def matrix_multiplication(M1, M2):
  return np.matmul(M1,M2)

In [133]:
%%time
matrix_multiplication_cpu(M1_cpu, M2_cpu, M3_cpu)

CPU times: user 6.42 ms, sys: 0 ns, total: 6.42 ms
Wall time: 6.32 ms


In [134]:
%%time
matrix_multiplication_gpu[blockspergrid, threadsperblock](M1_gpu, M2_gpu, M3_gpu)

CPU times: user 2.21 ms, sys: 0 ns, total: 2.21 ms
Wall time: 1.91 ms


In [136]:
%%time
matrix_multiplication(M1_cpu, M2_cpu)

CPU times: user 34 µs, sys: 0 ns, total: 34 µs
Wall time: 37.4 µs


array([[29029, 40828, 35158, 49212, 33903, 52294, 45245, 40036, 42127,
        31970, 33488, 49468, 38372, 54387, 36782, 38549, 32468, 31712,
        42033, 26258],
       [25700, 42918, 38600, 47951, 31801, 49547, 39716, 41213, 49788,
        30724, 38190, 47944, 40366, 52411, 38976, 46748, 40618, 40610,
        43441, 28602],
       [25122, 41382, 40859, 43098, 39485, 51524, 34570, 40402, 49090,
        30507, 44329, 37671, 29584, 50497, 40032, 44827, 38077, 33762,
        37432, 32816],
       [30951, 40028, 42170, 45320, 44379, 58481, 43524, 51874, 56556,
        33002, 43738, 47052, 30072, 63129, 49585, 47864, 39173, 41507,
        42615, 35062],
       [33013, 45196, 53849, 45594, 43567, 55347, 47393, 45281, 59067,
        31706, 44943, 51451, 37463, 54937, 51513, 51547, 37873, 49041,
        37118, 43735],
       [36336, 51132, 51460, 52020, 47164, 60926, 47753, 57341, 58940,
        45432, 51974, 50935, 37091, 66749, 46954, 61159, 37870, 45561,
        44559, 42623],
       [28