In [1]:
! pip install pycuda
import pycuda.autoinit
from pycuda.compiler import SourceModule
from pycuda import driver, gpuarray

Collecting pycuda
  Downloading pycuda-2022.2.2.tar.gz (1.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting pytools>=2011.2 (from pycuda)
  Downloading pytools-2023.1.1-py2.py3-none-any.whl (70 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m70.6/70.6 kB[0m [31m11.8 MB/s[0m eta [36m0:00:00[0m
Collecting mako (from pycuda)
  Downloading Mako-1.2.4-py3-none-any.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.7/78.7 kB[0m [31m12.6 MB/s[0m eta [36m0:00:00[0m
Building wheels for collected packages: pycuda
  Building wheel for pycuda (pyproject.toml) ... [?25l[?25hdone
  Created wheel for pycuda: filename=pycuda-2022.2.2-cp310-cp310-linux_x86_64.whl size=661265 sha256=d477

In [41]:
from PIL import Image
import numpy as np
import math
import time
import random

In [42]:
# ядро и вспомогательные элементы на c++
mod = SourceModule("""

 texture<int, 2, cudaReadModeElementType> image;

__device__ void bubble_sort(int* list, int listLength){
	while(listLength--){
		bool swapped = false;
		for(int i = 0; i < listLength; i++){
			if(list[i] > list[i + 1]){
        int temp = list[i];
        list[i] = list[i + 1];
        list[i + 1] = temp;
				swapped = true;
			}
		}
		if(swapped == false)
			break;
	}
}


__global__ void kernel(int* out, int n, int m, int kernel_size){
    extern __shared__  int median_value[];

    int i = blockIdx.x * blockDim.x + threadIdx.x;
    int j = blockIdx.y * blockDim.y + threadIdx.y;
    int k = (kernel_size - 1) / 2;

    if (i >= n || j >= m) {
        return;
    }

    int min_i = max(0, i - k);
    int min_j = max(0, j - k);
    int max_i = min(n-1, i + k);
    int max_j = min(m-1, j + k);

    int c_n = (max_i - min_i) + 1;
    int c_m = (max_j - min_j) + 1;

    for (int c_i = 0; c_i < c_n; ++c_i) {
        for (int c_j = 0; c_j < c_m; ++c_j) {
            median_value[c_i * c_m + c_j] = tex2D(image, min_j + c_j, min_i + c_i);
        }
    }
    bubble_sort(median_value, c_n * c_m);
    out[i * m + j] = median_value[c_n * c_m / 2];

}
  """)

# функция для динамического подсчета размерностей грида и блоков
def count_cuda_dims(n, m):
    xthreadsPerBlock = n if n < 8 else 8
    ythreadsPerBlock = m if m < 8 else 8
    blocksPerGrid = (math.ceil(n / xthreadsPerBlock), math.ceil(m / ythreadsPerBlock), 1)
    threadsPerBlock = (xthreadsPerBlock, ythreadsPerBlock, 1)
    return blocksPerGrid, threadsPerBlock


# функция для применения фильтра на GPU
def filter_gpu(inp, n, m, kernel_size):
  start_time = time.time()
  k = (kernel_size - 1) / 2

  tex_image = mod.get_texref("image")
  inp = inp.astype(np.int32)
  driver.matrix_to_texref(inp, tex_image, order="C")

  out = np.zeros(n*m)
  out = out.astype(np.int32)
  out_gpu = driver.mem_alloc(out.nbytes)
  driver.memcpy_htod(out_gpu, out)

  blocksPerGrid, threadsPerBlock=count_cuda_dims(n, m)
  sharedArrayOffsetScale = kernel_size * kernel_size
  bufSize = sharedArrayOffsetScale * threadsPerBlock[0] * threadsPerBlock[1]*4
  kernel = mod.get_function("kernel")
  kernel(out_gpu, np.int32(n), np.int32(m), np.int32(kernel_size), block=threadsPerBlock, grid=blocksPerGrid, shared=bufSize)

  driver.Context.synchronize()
  driver.memcpy_dtoh(out, out_gpu)
  return out, time.time()-start_time

/usr/local/cuda/bin/../targets/x86_64-linux/include/texture_fetch_functions.h(198): here was declared deprecated


  mod = SourceModule("""
  globals().clear()


In [43]:
# функция для применения фильтра на CPU
def filter_cpu(inp, n, m, kernel_size):
    start_time = time.time()
    output = np.zeros(n*m)
    kernel = (kernel_size - 1) / 2

    for i in range(n):
        for j in range(m):
            min_i = max(0, i - kernel)
            min_j = max(0, j - kernel)
            max_i = min(n-1, i + kernel)
            max_j = min(m-1, j + kernel)
            c_n = int((max_i - min_i) + 1)
            c_m = int((max_j - min_j) + 1)
            k = 0
            index0 = i * m + j
            value=[]
            for c_i in range(c_n):
                for c_j in range(c_m):
                    index = int((min_i + c_i) * m + (min_j + c_j))
                    f = inp[index]
                    value.append(f)
            v = sorted(value)

            output[i * m + j] = v[int(c_n*c_m/2)]
    return output, time.time()-start_time

In [44]:
# функция добавления шума на изображения
def add_noise(img):
    row , col = img.shape
    number_of_pixels = random.randint(300, 10000)
    for i in range(number_of_pixels):
        y_coord=random.randint(0, row - 1)
        x_coord=random.randint(0, col - 1)
        img[y_coord,x_coord] = 255
    number_of_pixels = random.randint(300 , 10000)
    for i in range(number_of_pixels):
        y_coord=random.randint(0, row - 1)
        x_coord=random.randint(0, col - 1)
        img[y_coord,x_coord] = 0
    return img

In [45]:
#добавление шума
import cv2
img = cv2.imread('Mona_Lisa_GS2.bmp', cv2.IMREAD_GRAYSCALE)
cv2.imwrite('sap.bmp', add_noise(img))

True

In [46]:
WIDTH = 400
HIDTH = 400
img=Image.open('sap.bmp').convert('L')
new_img = img.resize((WIDTH,HIDTH))
new_img.save('new_image.bmp')

In [47]:
img=Image.open('new_image.bmp').convert('L')
new_img = img.resize((WIDTH,HIDTH))
img_array = np.asarray(new_img).reshape(WIDTH*HIDTH)
res_cpu, time_cpu = filter_cpu(img_array, WIDTH, HIDTH, 9)
res_cpu = res_cpu.reshape((WIDTH, HIDTH))
PIL_image = Image.fromarray(np.uint8(res_cpu))
print(time_cpu)
PIL_image.save('CPU.bmp')

7.75164270401001


In [48]:
img=Image.open('new_image.bmp').convert('L')
new_img = img.resize((WIDTH,HIDTH))
img_array = np.array(new_img)
res_gpu, time_gpu = filter_gpu(img_array, WIDTH, HIDTH, 9)
print(time_gpu)
res_gpu = res_gpu.reshape((WIDTH, HIDTH))
PIL_image.save('GPU.bmp')

0.0060214996337890625


In [49]:
A = time_cpu/time_gpu
A

1287.3276053215077