In [1]:
import numpy as np
from numba import cuda
import timeit
import math



## Simple kernel launch with @cuda.jit decorator

In [2]:
@cuda.jit
def square_elements_I(array):
    """square each element in the array, for a single thread"""
    for idx in range(array.size):
        array[idx] = array[idx] ** 2

In [3]:
array = np.linspace(1, 50, num=500)

#launch kernel on a single grid, with a single block, and default single thread
square_elements_I[1, 1](array)

In [4]:
@cuda.jit
def square_elements_II(array):
    """square each element in the array, with awarness of thread index"""
    thread_position = cuda.grid(1) #get thread idx
    array[thread_position] = array[thread_position] ** 2

In [5]:
array = np.linspace(1, 5, num=50*50)
#launch kernel on multiple blocks and multiple threads
square_elements_II[50, 50](array)
print(array)

[ 1.          1.00320384  1.00641281 ... 24.96799744 24.98399616
 25.        ]


### Assignment 1
input a color image, output a grayscale image using CUDA

In [10]:
import cv2
image = cv2.imread('color.jpg')
image.shape

(2489, 2448, 3)

In [12]:
def to_grayscale(color_img):
    grayscale = np.zeros((color_img.shape[0], color_img.shape[1]), dtype=np.float32)

    # We use blocks of 32x32 pixels:
    blockdim = (32, 32)
    # We compute grid dimensions big enough to cover the whole image:
    griddim = (color_img.shape[0] // blockdim[0] + 1, color_img.shape[1] // blockdim[1] + 1)
    print('Grid dimensions:', griddim)

    rgb_to_intensity[griddim, blockdim](color_img, grayscale)
    
    return grayscale

@cuda.jit
def rgb_to_intensity(color_img_array, output_array):
    #convert height x witdth x 3 input to height x width x 1 output
    threadx, thready = cuda.grid(2)
    blue, green, red = color_img_array[thready][threadx]
    #convert rgb to single intensity value for grayscale, according to NTSC rec's
    intensity = 0.299 * red + 0.587 * green + 0.114 * blue
    output_array[thready][threadx] = intensity

grayscale = to_grayscale(image)

grayscale = np.array(grayscale, dtype=np.uint8)
cv2.imshow('image', grayscale)
cv2.waitKey(0)
cv2.destroyAllWindows()

Grid dimensions: (78, 77)


Grid dimensions: (33, 32)
