# Detecci칩n de bordeado simple mediante GPUs y CUDA

# codigo secuancial





In [32]:
import cv2
import numpy as np
import time


def apply_laplacian_cpu(image: np.ndarray) -> np.ndarray:
    if image is None:
        raise ValueError("Imagen inv치lida")

    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    height, width = gray.shape

    output = np.zeros_like(gray, dtype=np.int32)

    kernel = np.array([[0, 1, 0],
                       [1, -4, 1],
                       [0, 1, 0]], dtype=np.int32)

    for y in range(1, height - 1):
        for x in range(1, width - 1):
            region = gray[y-1:y+2, x-1:x+2]
            value = np.sum(region * kernel)
            output[y, x] = value

    output = np.abs(output)
    output = np.clip(output, 0, 255).astype(np.uint8)

    return output


if __name__ == "__main__":
    image = cv2.imread("img6.png")
    start = time.time()
    result = apply_laplacian_cpu(image)
    end = time.time()

    print(f"Tiempo CPU: {end - start:.4f} segundos")
    cv2.imwrite("resultado_cpu.png", result)

Tiempo CPU: 9.7877 segundos


# codigo paralelo

In [28]:
import cv2
import numpy as np
import time
import math
from typing import Tuple
from numba import cuda


# =========================================
# KERNEL CUDA
# Cada thread procesa 1 pixel
# =========================================
@cuda.jit
def threshold_kernel(input_image, output_image, threshold):
    row, col = cuda.grid(2)

    height = input_image.shape[0]
    width = input_image.shape[1]

    if row < height and col < width:
        r = input_image[row, col, 0]
        g = input_image[row, col, 1]
        b = input_image[row, col, 2]

        pixel_sum = r + g + b
        output_image[row, col] = 255 if pixel_sum > threshold else 0


# =========================================
# FUNCI칍N GPU
# =========================================
def threshold_gpu(image: np.ndarray, threshold: int = 150) -> Tuple[np.ndarray, float]:
    if image is None:
        raise ValueError("Imagen inv치lida")

    if image.ndim != 3 or image.shape[2] != 3:
        raise ValueError("La imagen debe ser RGB (NxMx3)")

    height, width, _ = image.shape

    # Transferencia a GPU
    d_input = cuda.to_device(image)
    d_output = cuda.device_array((height, width), dtype=np.uint8)

    threads_per_block = (16, 16)

    blocks_x = math.ceil(height / threads_per_block[0])
    blocks_y = math.ceil(width / threads_per_block[1])
    blocks_per_grid = (blocks_x, blocks_y)

    start = time.perf_counter()

    threshold_kernel[blocks_per_grid, threads_per_block](d_input, d_output, threshold)
    cuda.synchronize()

    end = time.perf_counter()

    result = d_output.copy_to_host()

    return result, end - start


# =========================================
# EJECUCI칍N
# =========================================

# Cargar imagen
image = cv2.imread("img6.png")

if image is None:
    raise FileNotFoundError("No se encontr칩 imagen.jpg en el directorio")

image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# 游댠 MODIFICACI칍N SOLICITADA
image = cv2.resize(image, (3000, 3000))

print(f"Tama침o final: {image.shape}")

# Ejecutar GPU
result_gpu, time_gpu = threshold_gpu(image)

print(f"Tiempo GPU: {time_gpu:.6f} segundos")

# Guardar resultado
cv2.imwrite("resultado_gpuNew.png", result_gpu)

print("Imagen procesada guardada como resultado_gpu.png")

Tama침o final: (3000, 3000, 3)
Tiempo GPU: 0.090266 segundos
Imagen procesada guardada como resultado_gpu.png


In [38]:
speedup = time_cpu / time_gpu
print(f"Speedup: {speedup:.2f}x")

Speedup: 158.94x
