<a href="https://colab.research.google.com/github/ghirailghiro/GPU_Computing_Project/blob/3-first-step-gradient-computation/Gradient_First_Step.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#Cuda plugin

!pip install nvcc4jupyter
%load_ext nvcc4jupyter
!nvidia-smi

# plugin for cpp sintax highlighting

!wget -O cpp_plugin.py https://gist.github.com/akshaykhadse/7acc91dd41f52944c6150754e5530c4b/raw/cpp_plugin.py
%load_ext cpp_plugin

Collecting nvcc4jupyter
  Downloading nvcc4jupyter-1.1.0-py3-none-any.whl (8.0 kB)
Installing collected packages: nvcc4jupyter
Successfully installed nvcc4jupyter-1.1.0
Source files will be saved in "/tmp/tmpvha0pged".
Sat Feb 17 20:50:26 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   47C    P8              10W /  70W |      0MiB / 15360MiB |      0%      Default |
|                            

In [2]:
!sudo apt update
!sudo apt install -y build-essential
!sudo apt install -y libopencv-dev

[33m0% [Working][0m            Get:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,626 B]
[33m0% [Connecting to archive.ubuntu.com] [Waiting for headers] [1 InRelease 3,626 [0m[33m0% [Connecting to archive.ubuntu.com] [Waiting for headers] [Connecting to ppa.[0m                                                                               Hit:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Get:3 http://security.ubuntu.com/ubuntu jammy-security InRelease [110 kB]
Hit:4 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:5 http://security.ubuntu.com/ubuntu jammy-security/main amd64 Packages [1,463 kB]
Get:6 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [119 kB]
Hit:7 https://ppa.launchpadcontent.net/c2d4u.team/c2d4u4.0+/ubuntu jammy InRelease
Get:8 http://security.ubuntu.com/ubuntu jammy-security/universe amd64 Packages [1,070 kB]
Hit:9 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy I

In [1]:
%%writefile gradient_computation.cu
#include <opencv2/opencv.hpp>
#include <iostream>
__constant__ float d_constArray[] = {-1, 0, 1}; // Example size of 256


/* CUDA kernel function declaration
__global__ void printPixelValues(unsigned char* image, int width, int height) {
    int idx = threadIdx.x + blockIdx.x * blockDim.x;
    int idy = threadIdx.y + blockIdx.y * blockDim.y;
    int index = idy * width + idx;

    // Ensure we don't go out of bounds
    if(idx < width && idy < height) {
        printf("Pixel (%d, %d): %d\n", idx, idy, image[index]);
    }
}*/

__global__ void computeGradients(unsigned char* image, float *d_magnitude, float *d_orientation, int width, int height) {
    int idx = blockIdx.x * blockDim.x + threadIdx.x;
    int idy = blockIdx.y * blockDim.y + threadIdx.y;
    int indexCurrent = idy * width + idx;

    if (idx >= width || idy >= height) return; // Boundary check
    int G_x = 0;
    if (indexCurrent > 0) {
        int precedingIndex = idy * width + (idx -1);
        unsigned char precedingValue = image[precedingIndex];

        // Example: Store the difference between the current pixel and its preceding pixel
        G_x += precedingValue*d_constArray[0];
    } else {
        // For the leftmost pixels, there's no preceding pixel in the same row
        // Handle accordingly, e.g., by simply copying the current pixel's value
        G_x += 0;
    }
    if (indexCurrent < width) {
        int postIndex = idy * width + (idx + 1);
        unsigned char postValue = image[postIndex];

        // Example: Store the difference between the current pixel and its preceding pixel
        G_x += postValue * d_constArray[2];
    } else {
        // For the leftmost pixels, there's no preceding pixel in the same row
        // Handle accordingly, e.g., by simply copying the current pixel's value
        G_x += 0;
    }

    int G_y = 0;
    if (indexCurrent > 0) {
        int precedingIndex = (idy-1) * width + idx;
        unsigned char precedingValue = image[precedingIndex];

        // Example: Store the difference between the current pixel and its preceding pixel
        G_y += precedingValue*d_constArray[0];
    } else {
        // For the leftmost pixels, there's no preceding pixel in the same row
        // Handle accordingly, e.g., by simply copying the current pixel's value
        G_y += 0;
    }
    if (indexCurrent < width) {
        int postIndex = (idy+1) * width + idx;
        unsigned char postValue = image[postIndex];

        // Example: Store the difference between the current pixel and its preceding pixel
        G_y += postValue * d_constArray[2];
    } else {
        // For the leftmost pixels, there's no preceding pixel in the same row
        // Handle accordingly, e.g., by simply copying the current pixel's value
        G_y += 0;
    }
    printf("G_x e G_y (%d, %d) at value %d %d\n", G_x, G_y,precedingValue,postValue);
    d_magnitude[indexCurrent] = sqrtf(powf(G_x,2) + powf(G_y,2))
    d_orientation[indexCurrent] = atan2f(G_x, G_y);

}



int main() {
    // Example: Load an image using OpenCV
    cv::Mat image = cv::imread("image.jpg", cv::IMREAD_GRAYSCALE);
    if(image.empty()) {
        std::cerr << "Failed to load image." << std::endl;
        return -1;
    }

    unsigned char* d_image;
    size_t imageSize = image.total() * image.elemSize();
    cudaMalloc(&d_image, imageSize);
    cudaMemcpy(d_image, image.data, imageSize, cudaMemcpyHostToDevice);
    size_t sizeInBytes = image.total() * sizeof(float);
    float* d_magnitude;
    cudaError_t status = cudaMalloc((void **)&d_magnitude, sizeInBytes);
    if (status != cudaSuccess) {
        // Handle error (e.g., printing an error message and exiting)
        fprintf(stderr, "cudaMalloc failed: %s\n", cudaGetErrorString(status));
        exit(EXIT_FAILURE);
    }
    status = cudaMemset(d_magnitude, 0, sizeInBytes);
    if (status != cudaSuccess) {
        // Handle error
        fprintf(stderr, "cudaMemset failed: %s\n", cudaGetErrorString(status));
        exit(EXIT_FAILURE);
    }
    sizeInBytes = image.total() * sizeof(float);
    float* d_orientation;
    status = cudaMalloc((void **)&d_orientation, sizeInBytes);
    if (status != cudaSuccess) {
        // Handle error (e.g., printing an error message and exiting)
        fprintf(stderr, "cudaMalloc failed: %s\n", cudaGetErrorString(status));
        exit(EXIT_FAILURE);
    }
    status = cudaMemset(d_orientation, 0, sizeInBytes);
    if (status != cudaSuccess) {
        // Handle error
        fprintf(stderr, "cudaMemset failed: %s\n", cudaGetErrorString(status));
        exit(EXIT_FAILURE);
    }

    // Assuming image dimensions are reasonable for a grid of threads
    dim3 blockSize(16, 16);
    dim3 gridSize((image.cols + blockSize.x - 1) / blockSize.x,
                  (image.rows + blockSize.y - 1) / blockSize.y);

    // Launch the kernel
    //printPixelValues<<<gridSize, blockSize>>>(d_image, image.cols, image.rows);
    computeGradients<<<gridSize, blockSize>>>(d_image, d_magnitude, d_orientation, image.cols, image.rows);

    cudaDeviceSynchronize();

    cudaFree(d_image);
    cudaFree(d_magnitude);
    cudaFree(d_orientation);
    return 0;
}


Writing gradient_computation.cu


In [36]:
!nvcc gradient_computation.cu -o gradient_computation `pkg-config --cflags --libs opencv4` -run


[1;30;43mOutput streaming troncato alle ultime 5000 righe.[0m
G_x e G_y (-77, -76) at index : 30981
G_x e G_y (-77, -76) at index : 30982
G_x e G_y (-77, -76) at index : 30983
G_x e G_y (-77, -76) at index : 30984
G_x e G_y (-77, -76) at index : 30985
G_x e G_y (-77, -76) at index : 30986
G_x e G_y (-77, -76) at index : 30987
G_x e G_y (-74, -76) at index : 31882
G_x e G_y (-76, -76) at index : 31883
G_x e G_y (-76, -76) at index : 31884
G_x e G_y (-76, -76) at index : 31885
G_x e G_y (-76, -76) at index : 31886
G_x e G_y (-76, -76) at index : 31887
G_x e G_y (-76, -76) at index : 31888
G_x e G_y (-76, -76) at index : 31889
G_x e G_y (-76, -77) at index : 31890
G_x e G_y (-77, -77) at index : 31891
G_x e G_y (-77, -77) at index : 31892
G_x e G_y (-77, -77) at index : 31893
G_x e G_y (-77, -77) at index : 31894
G_x e G_y (-77, -77) at index : 31895
G_x e G_y (-77, -77) at index : 31896
G_x e G_y (-77, -77) at index : 31897
G_x e G_y (-75, -76) at index : 32792
G_x e G_y (-76, -76) at 

In [19]:
!./gradient_computation