# Notebook for Elementary Operation Experimentation

In [1]:
import numpy as np
import pycuda.gpuarray as gpuarray

from utils.context import Context


%load_ext autoreload
%autoreload 2

## Matrix Multiplication

In [2]:
BLOCK_SIZE = 32
context = Context(BLOCK_SIZE)

matrix_multiplication_kernel_path = './kernels/MatrixMultiplication.cu'
matrix_multiplication = context.getSourceModule(matrix_multiplication_kernel_path).get_function('MatMul')


### Test Cases

Sequential

In [3]:
a = np.random.randint(5, size=(2, 4))
b = np.random.randint(5, size=(4, 2))
c_python = np.matmul(a, b)

print(a)
print(b)
print(c_python)

[[4 4 1 4]
 [0 0 0 1]]
[[3 0]
 [2 4]
 [4 4]
 [1 2]]
[[28 28]
 [ 1  2]]


Parallel

In [4]:
a = a.astype(np.float32)
b = b.astype(np.float32)
c = np.zeros((a.shape[0], b.shape[1])).astype(np.float32)

a_d = gpuarray.to_gpu(a)
b_d = gpuarray.to_gpu(b)
c_d = gpuarray.to_gpu(c)

block = context.block_dims
grid = context.grid_dims(max([a.shape[0], b.shape[1]]))

matrix_multiplication(a_d, b_d, c_d, np.int32(a.shape[0]), 
                      np.int32(a.shape[1]), np.int32(b.shape[0]), 
                      np.int32(b.shape[1]), np.int32(c.shape[0]), np.int32(c.shape[1]),
                      block=block, grid=grid)

c = c_d.get()

print(c)


[[28. 28.]
 [ 1.  2.]]


## Matrix Transpose

Not working, look through this later.

In [50]:
matrix_transpose_kernel_path = './kernels/MatrixTranspose.cu'
matrix_transpose = context.getSourceModule(matrix_transpose_kernel_path).get_function('MatTrans')


### Test Cases

In [53]:
a = np.random.randint(5, size=(32, 32))
a_t = a.T
print(a)
print()
print(a_t)

[[1 1 4 ... 3 0 0]
 [4 0 0 ... 0 1 2]
 [1 1 2 ... 2 0 2]
 ...
 [4 2 1 ... 1 2 4]
 [0 4 3 ... 4 2 4]
 [0 3 4 ... 4 4 4]]

[[1 4 1 ... 4 0 0]
 [1 0 1 ... 2 4 3]
 [4 0 2 ... 1 3 4]
 ...
 [3 0 2 ... 1 4 4]
 [0 1 0 ... 2 2 4]
 [0 2 2 ... 4 4 4]]


In [55]:
a = a.astype(np.float32)
b = np.zeros((a.shape[1], a.shape[0])).astype(np.float32)

a_d = gpuarray.to_gpu(a)
b_d = gpuarray.to_gpu(b)

block = context.block_dims
grid = context.grid_dims(max([a.shape[0], b.shape[1]]))

# print(block)
# print(grid)

matrix_transpose(a_d, b_d, np.int32(a.shape[0]), np.int32(a.shape[1]), block=block, grid=grid)

b = b_d.get()

print(a.shape)
print(b.shape)

print(b)
print(a_t)
print("Correct: ", np.allclose(a.T, b))

LogicError: cuMemAlloc failed: an illegal memory access was encountered