### 1. Import libraries

In [16]:
from numba import cuda

import numpy as np

import matplotlib.image as mpimg
import matplotlib.pyplot as plt

from PIL import Image
import cv2
import math
import operator

import glob2

import timeit

print(cuda.gpus)

<Managed Device 0>


In [2]:
from HOG_CUDA import HOG_CUDA

In [3]:
from HOG_CPU  import HOG_CPU

In [4]:
from skimage import feature

### 2. Configure settings

In [5]:
cell_size = (8, 8)
block_size = (2, 2)
nbins = 9
sbins = 40
threadsperblock = (32,32)

### 3. Read images

##### Nếu dùng google colab

In [6]:
# from google.colab import drive
# drive.mount('/content/drive')
# link_cat = 'drive/MyDrive/ltssud/data/Cat/**'
# link_dog = 'drive/MyDrive/ltssud/data/Dog/**'

##### Nếu dùng local drive

In [7]:
link_cat = r'C:\Users\dolek\Documents\GitHub\Applied-Parallel-Programming\Main\Data\Cat\**'
link_dog = r'C:\Users\dolek\Documents\GitHub\Applied-Parallel-Programming\Main\Data\Dog\**'

In [8]:
def get_image(link_cat, link_dog,number):
    images  = []
    labels = []
    list_cat = glob2.glob(link_cat)
    list_dog = glob2.glob(link_dog)
    print('Cat images size : ',len(list_cat))
    for i in range(number):
        if('jpg' in list_cat[i]):
            img = Image.open(list_cat[i]).convert('RGB')
            img = img.resize((400,400), Image.LANCZOS)
            temp_img = np.asarray(img)
            if len(temp_img.shape)  == 3:
                images.append(temp_img)
                labels.append(1)
    print("Cat done!")
    print('Dog images size : ',len(list_dog))
    for i in range(number):
        if('jpg' in list_dog[i]):
            img = Image.open(list_dog[i]).convert('RGB')
            img = img.resize((400,400), Image.LANCZOS)
            temp_img = np.asarray(img)
            if len(temp_img.shape)  == 3:
                images.append(temp_img)
                labels.append(-1)
    print("Dog done!")
    return images,labels

### 4. Time testing

In [9]:
def time_library(images, block_size, cell_size, nbins):
    #result = []
    for image in images:
        H = feature.hog(image, orientations=nbins, pixels_per_cell=block_size, \
                cells_per_block=cell_size, transform_sqrt=True, block_norm="L2",channel_axis=-1)
        #result.append(H)
    #return result

In [10]:
def time_HOG_CPU(images, block_size, cell_size, nbins, sbins):
    #result = []
    kernel = HOG_CPU(block_size, cell_size, nbins, sbins)
    for image in images:
        kernel.compute_HOG(image)
    #return result

In [11]:
def time_HOG_CUDA(images, block_size, cell_size, nbins, sbins,threadsperblock):
    #result = []
    kernel = HOG_CUDA(block_size, cell_size, nbins, sbins, threadsperblock)
    for image in images:
        kernel.compute_HOG(image)
    #return result

In [12]:
def time_testing(images, block_size, cell_size, nbins, sbins, threadsperblock):
    print(len(images))
    print("Library")
    %timeit time_library(images, block_size, cell_size, nbins)
    print("CPU :")
    %timeit time_HOG_CPU(images, block_size, cell_size, nbins, sbins)
    print("GPU :")
    %timeit time_HOG_CUDA(images, block_size, cell_size, nbins, sbins, threadsperblock)

In [13]:
image,label =  get_image(link_cat, link_dog, 5000)

Cat images size :  12500
Cat done!
Dog images size :  12500
Dog done!


In [14]:
time_testing(image[0:1], block_size, cell_size, nbins, sbins, threadsperblock)

1
Library
409 ms ± 18.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
CPU :
1.1 s ± 6.67 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
GPU :
32.1 ms ± 334 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [15]:
time_testing(image[0:100], block_size, cell_size, nbins, sbins, threadsperblock)

100
Library
40.6 s ± 1.21 s per loop (mean ± std. dev. of 7 runs, 1 loop each)
CPU :



KeyboardInterrupt



In [None]:
time_testing(image[0:200], block_size, cell_size, nbins, sbins, threadsperblock)

In [None]:
time_testing(image[0:500], block_size, cell_size, nbins, sbins, threadsperblock)

In [None]:
time_testing(image[0:1000], block_size, cell_size, nbins, sbins, threadsperblock)

In [None]:
time_testing(image[0:2000], block_size, cell_size, nbins, sbins, threadsperblock)

In [None]:
time_testing(image[0:5000], block_size, cell_size, nbins, sbins, threadsperblock)