In [2]:
import numpy as np
import pycuda.driver as cuda
import pycuda.autoinit
from pycuda.compiler import SourceModule
from PIL import Image
import time

# Load and save image using Pillow
def load_image(filepath):
    image = Image.open(filepath).convert("L")  # Convert to grayscale
    return np.array(image, dtype=np.uint8)

def save_image(img_array, filepath):
    img_array = np.clip(img_array, 0, 255)  # Ensure pixel values are within range
    image = Image.fromarray(img_array.astype(np.uint8))  # Convert back to uint8 for saving
    image.save(filepath)

# CUDA kernel code for the median filter (C++ code as a string)
kernel_code = """
texture<unsigned char, 2, cudaReadModeElementType> tex;

__global__ void median_filter(unsigned char* output, int width, int height) {
    int x = blockIdx.x * blockDim.x + threadIdx.x;
    int y = blockIdx.y * blockDim.y + threadIdx.y;

    if (x >= 1 && x < width - 1 && y >= 1 && y < height - 1) {
        unsigned char window[9];
        int idx = 0;

        // Read the 3x3 window from texture memory
        for (int i = -1; i <= 1; ++i) {
            for (int j = -1; j <= 1; ++j) {
                window[idx++] = tex2D(tex, x + j, y + i);
            }
        }

        // Sort the window to find the median
        for (int i = 0; i < 9; i++) {
            for (int j = i + 1; j < 9; j++) {
                if (window[i] > window[j]) {
                    unsigned char temp = window[i];
                    window[i] = window[j];
                    window[j] = temp;
                }
            }
        }

        // Set the median value in the output
        output[y * width + x] = window[4]; // The median is the 5th element
    }
}
"""

# Apply median filter using CUDA
def apply_median_filter(img_array):
    height, width = img_array.shape

    # Allocate memory for input and output on GPU
    d_img = cuda.mem_alloc(img_array.nbytes)
    d_output = cuda.mem_alloc(img_array.nbytes)

    # Copy input image data to the device (GPU)
    cuda.memcpy_htod(d_img, img_array)

    # Compile the CUDA kernel
    mod = SourceModule(kernel_code)

    # Bind the texture to the input image data
    tex = mod.get_texref('tex')

    # Create a cudaArray to bind to the texture
    img_array_device = cuda.mem_alloc(img_array.nbytes)
    cuda.memcpy_htod(img_array_device, img_array)

    tex.set_array(img_array_device)

    # Set up the kernel function and launch configuration
    median_filter = mod.get_function('median_filter')
    block_size = (16, 16, 1)
    grid_size = ((width + block_size[0] - 1) // block_size[0],
                 (height + block_size[1] - 1) // block_size[1])

    # Launch the kernel
    median_filter(d_output, np.int32(width), np.int32(height), block=block_size, grid=grid_size)

    # Copy the result back from GPU to CPU
    output_array = np.empty_like(img_array)
    cuda.memcpy_dtoh(output_array, d_output)

    # Return the processed image
    return output_array

if __name__ == "__main__":
    # Path to input and output images
    input_path = "input.bmp"
    output_path = "output.bmp"

    # Load the input image
    img_array = load_image(input_path)

    # Measure processing time
    start_time = time.time()

    # Apply median filter
    output_img_array = apply_median_filter(img_array)

    # Save the output image
    save_image(output_img_array, output_path)

    print(f"Output image saved as {output_path}")
    print(f"Processing time: {time.time() - start_time:.3f} seconds")


CompileError: nvcc compilation of C:\Users\eugene\AppData\Local\Temp\tmp3yp0o5jl\kernel.cu failed
[command: nvcc --cubin -arch sm_86 -m64 -Ic:\Users\eugene\AppData\Local\Programs\Python\Python310\lib\site-packages\pycuda\cuda kernel.cu]
[stdout:
kernel.cu(3): error: texture is not a template
  texture<unsigned char, 2, cudaReadModeElementType> tex;
  ^

kernel.cu(16): error: no instance of overloaded function "tex2D" matches the argument list
            argument types are: (<error-type>, int, int)
                  window[idx++] = tex2D(tex, x + j, y + i);
                                  ^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.6\include\texture_indirect_functions.h(163): note #3322-D: number of parameters of function template "tex2D<T>(cudaTextureObject_t, float, float, __nv_bool *)" does not match the call
  static __declspec(__device__)  T tex2D(cudaTextureObject_t texObject, float x, float y, bool* isResident)
                                   ^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.6\include\texture_indirect_functions.h(154): note #3322-D: number of parameters of function template "tex2D(T *, cudaTextureObject_t, float, float, __nv_bool *)" does not match the call
  static __declspec(__device__) typename __nv_itex_trait<T>::type tex2D(T *ptr, cudaTextureObject_t obj, float x, float y, 
                                                                  ^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.6\include\texture_indirect_functions.h(145): note #3327-D: candidate function template "tex2D<T>(cudaTextureObject_t, float, float)" failed deduction
  static __declspec(__device__)  T tex2D(cudaTextureObject_t texObject, float x, float y)
                                   ^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.6\include\texture_indirect_functions.h(139): note #3322-D: number of parameters of function template "tex2D(T *, cudaTextureObject_t, float, float)" does not match the call
  static __declspec(__device__) typename __nv_itex_trait<T>::type tex2D(T *ptr, cudaTextureObject_t obj, float x, float y)
                                                                  ^

2 errors detected in the compilation of "kernel.cu".
kernel.cu
]

In [8]:
import cupy as cp
import numpy as np
from PIL import Image
import time

# Загрузка изображения и преобразование в массив NumPy
def load_image(filepath):
    image = Image.open(filepath).convert("L")  # Преобразуем изображение в градации серого
    img_array = np.array(image, dtype=np.uint8)
    return img_array

def save_image(img_array, filepath):
    # Убедитесь, что тип данных изображения - uint8 (целочисленный формат)
    img_array = np.clip(img_array, 0, 255)  # Обрезаем значения пикселей в пределах от 0 до 255
    image = Image.fromarray(img_array.astype(np.uint8))  # Преобразуем в uint8
    image.save(filepath)

# C++ код для медианного фильтра
kernel_code = """
extern "C" __global__
void median_filter(const unsigned char *input, unsigned char *output, int width, int height)
{
    int x = blockIdx.x * blockDim.x + threadIdx.x;
    int y = blockIdx.y * blockDim.y + threadIdx.y;
    
    if (x >= 1 && x < width - 1 && y >= 1 && y < height - 1) {
        unsigned char window[9];
        int idx = 0;

        for (int i = -1; i <= 1; ++i) {
            for (int j = -1; j <= 1; ++j) {
                window[idx++] = input[(y + i) * width + (x + j)];
            }
        }

        // Сортируем окно
        for (int i = 0; i < 9; i++) {
            for (int j = i + 1; j < 9; j++) {
                if (window[i] > window[j]) {
                    unsigned char temp = window[i];
                    window[i] = window[j];
                    window[j] = temp;
                }
            }
        }

        // Устанавливаем медиану
        output[y * width + x] = window[4];
    }
}
"""

# Используем RawKernel для компиляции и запуска C++ кода
def apply_median_filter(img_array):
    height, width = img_array.shape

    # Преобразуем изображение в массив CuPy (GPU-совместимый массив)
    d_img = cp.asarray(img_array)
    d_output = cp.empty_like(d_img)

    # Настройка CUDA-ядра
    kernel = cp.RawKernel(kernel_code, "median_filter")

    # Указываем блоки и сетку для CUDA
    threads_per_block = (16, 16)
    blocks_per_grid = (width // 16 + 1, height // 16 + 1)

    # Запуск ядра
    kernel(blocks_per_grid, threads_per_block, (d_img, d_output, width, height))

    # Переводим результат обратно в NumPy для сохранения
    return cp.asnumpy(d_output)

if __name__ == "__main__":
    # Путь к исходному изображению с шумом
    input_path = "input.bmp"
    output_path = "output.bmp"

    # Загрузка и обработка изображения
    img_array = load_image(input_path)

    # Измеряем время начала обработки
    start = time.time()

    # Применение медианного фильтра с использованием C++ ядра
    output_img_array = apply_median_filter(img_array)

    # Сохранение результата
    save_image(output_img_array, output_path)

    print(f"Output image saved as {output_path}")
    print(f"Time taken: {time.time() - start:.3f} seconds")


Output image saved as output.bmp
Time taken: 0.002 seconds
