# Notebook for Elementary Operation Experimentation

In [1]:
import numpy as np
import pycuda.gpuarray as gpuarray

from utils.context import Context


%load_ext autoreload
%autoreload 2

## Matrix Multiplication

In [2]:
BLOCK_SIZE = 32
context = Context(BLOCK_SIZE)

matrix_multiplication_kernel_path = './kernels/MatrixMultiplication.cu'
matrix_multiplication = context.getSourceModule(matrix_multiplication_kernel_path).get_function('MatMul')


### Test Cases

Sequential

In [19]:
a = np.random.randint(5, size=(28, 49))
b = np.random.randint(5, size=(49, 29))
c_python = np.matmul(a, b)

print(a)
print(b)
print(c_python)

[[2 1 3 ... 3 0 1]
 [0 2 3 ... 1 0 4]
 [3 4 4 ... 3 4 0]
 ...
 [4 4 2 ... 1 3 0]
 [1 0 1 ... 4 0 3]
 [0 4 3 ... 4 2 2]]
[[1 0 4 ... 1 2 2]
 [0 4 1 ... 2 4 3]
 [3 4 4 ... 3 4 4]
 ...
 [4 4 0 ... 1 3 3]
 [1 3 3 ... 4 4 4]
 [2 1 3 ... 1 3 0]]
[[182 189 162 187 198 207 232 180 233 217 203 212 137 194 209 136 180 174
  217 202 199 235 205 190 203 187 233 173 208]
 [224 160 171 189 219 185 241 213 243 203 227 222 161 202 216 183 174 180
  235 227 179 227 224 213 208 209 220 177 206]
 [226 210 171 206 213 204 270 226 243 232 232 211 159 209 219 169 192 182
  222 229 202 243 242 214 235 208 236 204 241]
 [230 194 214 221 229 209 270 218 285 241 253 214 183 184 237 181 177 197
  242 218 209 241 244 217 238 221 254 201 217]
 [190 145 177 183 169 154 224 165 226 181 183 173 149 142 178 139 127 147
  177 158 155 195 195 172 197 207 186 162 153]
 [147 146 142 169 137 133 182 180 185 155 174 151  95 147 150 127 155 149
  190 185 130 162 169 151 160 150 177 130 161]
 [237 200 201 224 199 196 260 227 

Parallel

In [21]:
a = a.astype(np.float32)
b = b.astype(np.float32)
c = np.zeros((a.shape[0], b.shape[1])).astype(np.float32)

a_d = gpuarray.to_gpu(a)
b_d = gpuarray.to_gpu(b)
c_d = gpuarray.to_gpu(c)

block = context.block_dims
grid = context.grid_dims(max([a.shape[0], b.shape[1]]))

matrix_multiplication(a_d, b_d, c_d, np.int32(a.shape[0]), 
                      np.int32(a.shape[1]), np.int32(b.shape[0]), 
                      np.int32(b.shape[1]), np.int32(c.shape[0]), np.int32(c.shape[1]),
                      block=block, grid=grid)

c = c_d.get()

print(c)
print("Correct: ", np.a

SyntaxError: EOL while scanning string literal (<ipython-input-21-0da7bf734851>, line 20)

## Matrix Transpose

Not working, look through this later.

In [8]:
matrix_transpose_kernel_path = './kernels/MatrixTranspose.cu'
matrix_transpose = context.getSourceModule(matrix_transpose_kernel_path).get_function('MatTranSquare')


### Test Cases

In [16]:
a = np.random.randint(5, size=(40, 50))
a_t = a.T
print(a)
print()
print(a_t)

[[3 4 1 ... 0 1 0]
 [1 2 1 ... 3 0 1]
 [3 2 1 ... 3 3 1]
 ...
 [2 0 1 ... 2 3 3]
 [0 2 3 ... 1 2 1]
 [3 0 4 ... 3 4 0]]

[[3 1 3 ... 2 0 3]
 [4 2 2 ... 0 2 0]
 [1 1 1 ... 1 3 4]
 ...
 [0 3 3 ... 2 1 3]
 [1 0 3 ... 3 2 4]
 [0 1 1 ... 3 1 0]]


In [18]:
a = a.astype(np.float32)
b = np.zeros((a.shape[1], a.shape[0])).astype(np.float32)

a_d = gpuarray.to_gpu(a)
b_d = gpuarray.to_gpu(b)

block = context.block_dims
grid = context.grid_dims(max([a.shape[0], b.shape[1]]))

# print(block)
# print(grid)

matrix_transpose(a_d, b_d, np.int32(a.shape[1]), np.int32(a.shape[0]), block=block, grid=grid)

b = b_d.get()

print(a.shape)
print(b.shape)

print(b)
print(a_t)
print("Correct: ", np.allclose(a.T, b))

(40, 50)
(50, 40)
[[3. 1. 3. ... 2. 0. 3.]
 [4. 2. 2. ... 0. 2. 0.]
 [1. 1. 1. ... 1. 3. 4.]
 ...
 [0. 3. 3. ... 2. 1. 3.]
 [1. 0. 3. ... 3. 2. 4.]
 [0. 1. 1. ... 3. 1. 0.]]
[[3 1 3 ... 2 0 3]
 [4 2 2 ... 0 2 0]
 [1 1 1 ... 1 3 4]
 ...
 [0 3 3 ... 2 1 3]
 [1 0 3 ... 3 2 4]
 [0 1 1 ... 3 1 0]]
Correct:  True
