# Introduction to GPU Programming with Python
## Solutions to notebook 3


### Main example: Matrix multiplication (on CPU without Numba)

![](../images/Matrix_multiplication_diagram_2.svg.png)

![](../images/matrix_formula.png)

In [None]:
import numpy as np

In [None]:
# Write a matrix multiplication code (2 external loops over i,j 
# and one internal for multiplication and reduction)
def matmul(A,B,C):
    # iterating by row of A
    for i in range(len(A)):
  
        # iterating by coloum by B 
        for j in range(len(B[0])):
  
            # iterating by rows of B
            for k in range(len(B)):
                C[i][j] += A[i][k] * B[k][j]

In [None]:
#Part 1: Create matrices A,B,C as numpy arrays (128,128). Fill A and B with random numbers.
A=np.random.rand(128,128)
B=np.random.rand(128,128)
C=np.zeros(shape=(128,128))

In [None]:
%timeit matmul(A,B,C)

### Main example: Matrix multiplication (on CPU with Numba, optimization only)

In [None]:
from numba import jit

In [None]:
# Add Numba decorator here to optimize the code
@jit(nopython=True)
def matmul(A,B,C):
    # iterating by row of A
    for i in range(len(A)):
  
        # iterating by coloum by B 
        for j in range(len(B[0])):
  
            # iterating by rows of B
            for k in range(len(B)):
                C[i][j] += A[i][k] * B[k][j]

In [None]:
#Execute matmul and measure execution time
%timeit matmul(A,B,C)

### Main example: Matrix multiplication (on CPU with Numba, optimization + parallelization)

In [None]:
from numba import jit, prange

In [None]:
# Add Numba decorator with parallel option and replace range with prange to parallelize the code
@jit(nopython=True, parallel=True)
def matmul(A,B,C):
    # iterating by row of A
    for i in prange(len(A)):
  
        # iterating by coloum by B 
        for j in prange(len(B[0])):
  
            # iterating by rows of B
            for k in range(len(B)):
                C[i][j] += A[i][k] * B[k][j]

In [None]:
%timeit matmul(A,B,C)

### Exercise 1 : Array elements incrementation

In [None]:
# Import all required libs
import numpy
from numba import jit,prange

In [None]:
# Write a CPU parallel code (with the use of @jit decorator)
@jit(nopython=True,parallel=True)
def incrementation(array):
    for i in prange(array.size):
        array[i] += 1

In [None]:
# Define CUDA grid: provide with number of blocks and threads per block
data=numpy.ones(12800)
%timeit incrementation(data)
