<a href="https://colab.research.google.com/github/ghirailghiro/GPU_Computing_Project/blob/10-add-sequential-algorithm/Gradient_First_Step.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#Cuda plugin

!pip install nvcc4jupyter
%load_ext nvcc4jupyter
!nvidia-smi

# plugin for cpp sintax highlighting

!wget -O cpp_plugin.py https://gist.github.com/akshaykhadse/7acc91dd41f52944c6150754e5530c4b/raw/cpp_plugin.py
%load_ext cpp_plugin

In [None]:
!sudo apt update
!sudo apt install -y build-essential
!sudo apt install -y libopencv-dev

In [3]:
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')


Mounted at /content/drive


In [4]:
!ls '/content/drive/My Drive/GPU Computing/human detection dataset'

0  1


In [None]:
!git clone --recursive https://github.com/YoungYoung619/pedestrian-detection-in-hazy-weather.git

In [20]:
%%writefile gradient_computation.cu
#include <opencv2/opencv.hpp>
#include <fstream>
#include <sstream>
#include <iostream>
#include <vector>
#include <filesystem>
#include <chrono>

namespace fs = std::filesystem;

void saveDescriptorAsCSVHeader(const std::vector<int>& descriptor, const std::string& filename, const std::string& label) {
    std::ofstream file(filename, std::ios::app);
    if (!file.is_open()) {
        std::cerr << "Error: Unable to open file " << filename << " for writing." << std::endl;
        return;
    }

    // Write the descriptor to the CSV file
    for (size_t i = 0; i < descriptor.size(); ++i) {
        file << "x" <<descriptor[i];
        if (i < descriptor.size() - 1) {
            file << ",";
        }
    }
    file << "," << label <<","<< "Exec Time" << "\n";
    file.close();
}


void saveDescriptorAsCSV(const std::vector<float>& descriptor, const std::string& filename, int label,  double executionTime) {
    std::ofstream file(filename, std::ios::app);
    if (!file.is_open()) {
        std::cerr << "Error: Unable to open file " << filename << " for writing." << std::endl;
        return;
    }

    // Write the descriptor to the CSV file
    for (size_t i = 0; i < descriptor.size(); ++i) {
        file << descriptor[i];
        if (i < descriptor.size() - 1) {
            file << ",";
        }
    }
    file << "," << label << "," << executionTime << "\n";
    file.close();
}

void computeGradients_seq(const cv::Mat& image, std::vector<float>& magnitude, std::vector<float>& orientation, std::vector<float>& histograms, int cellSize, int numBins) {
    magnitude.clear();
    orientation.clear();
    histograms.clear();
    std::cout << "Entering computeGradients" << std::endl;

    // Assuming image dimensions are reasonable for a grid of threads
    int width = image.cols;
    int height = image.rows;
    int numCellsX = width / cellSize;
    int numCellsY = height / cellSize;
    histograms.resize(numCellsX * numCellsY * numBins, 0); // Initialize histogram vector

    // Compute gradients, magnitude, and orientation
    int countBin = 0;
    int countHist = 0;
    int countHistPos = 0;
    for (int idy = 0; idy < height; ++idy) {
        for (int idx = 0; idx < width; ++idx) {
            float G_x = 0, G_y = 0;
            if (idx > 0 && idx < width - 1) {
                G_x = static_cast<float>(image.at<uchar>(idy, idx + 1)) - static_cast<float>(image.at<uchar>(idy, idx - 1));
            }
            if (idy > 0 && idy < height - 1) {
                G_y = static_cast<float>(image.at<uchar>(idy + 1, idx)) - static_cast<float>(image.at<uchar>(idy - 1, idx));
            }

            float mag = std::sqrt(G_x * G_x + G_y * G_y);
            float orient = std::atan2(G_y, G_x);

            magnitude.push_back(mag);
            orientation.push_back(orient);

            // Compute histogram bin for the current gradient
            int cellX = idx / cellSize;
            int cellY = idy / cellSize;
            int histIndex = cellY * numCellsX + cellX;
                countHistPos++; // Ensure index is within bounds
                float binWidth = M_PI / numBins;
                int bin = std::floor((orient + M_PI) / binWidth);
                if (bin == numBins) bin = 0; // Wrap around
                histograms[histIndex * numBins + bin] += mag;
        }
    }
    std::cerr << "------------Summary of errors-----------" << std::endl;
    std::cerr << "Bin out of range: " << countBin << std::endl;
    std::cerr << "Histogram out of range: " << countHist << std::endl;
    std::cerr << "Histogram Pos: " << countHistPos << std::endl;
    std::cout << "Ending computeGradients" << std::endl;
}

__global__ void computeGradients(unsigned char* image, float *d_magnitude, float *d_orientation, float *d_histograms, int width, int height, int cellSize) {
    int idx = blockIdx.x * blockDim.x + threadIdx.x;
    int idy = blockIdx.y * blockDim.y + threadIdx.y;
    int indexCurrent = idy * width + idx;

    if (idx >= width || idy >= height) return; // Boundary check

    float G_x = 0;
    if (idx > 0 && idx < width - 1) {
        G_x = (float)image[idy * width + (idx + 1)] - (float)image[idy * width + (idx - 1)];
    }

    float G_y = 0;
    if (idy > 0 && idy < height - 1) {
        G_y = (float)image[(idy + 1) * width + idx] - (float)image[(idy - 1) * width + idx];
    }

    d_magnitude[indexCurrent] = sqrtf(G_x * G_x + G_y * G_y);
    d_orientation[indexCurrent] = atan2f(G_y, G_x);

    // Compute histogram bin for the current gradient
    int cellX = idx / cellSize;
    int cellY = idy / cellSize;
    int histIndex = cellY * (width / cellSize) + cellX;
    int numBins = 9; // Assuming 9 orientation bins
    float binWidth = M_PI / numBins;
    int bin = floor((d_orientation[indexCurrent] + M_PI) / binWidth);
    if (bin == numBins) bin = 0; // Wrap around
    // Debug output

    atomicAdd(&d_histograms[histIndex * numBins + bin], d_magnitude[indexCurrent]);
}

std::vector<float> computeDescriptorsCUDA(const cv::Mat& image, double& executionTime) {
    unsigned char* d_image;
    size_t imageSize = image.total() * image.elemSize();
    cudaMalloc(&d_image, imageSize);
    cudaMemcpy(d_image, image.data, imageSize, cudaMemcpyHostToDevice);
    size_t sizeInBytes = image.total() * sizeof(float);
    float* d_magnitude;
    cudaError_t status = cudaMalloc((void **)&d_magnitude, sizeInBytes);
    if (status != cudaSuccess) {
        // Handle error (e.g., printing an error message and exiting)
        fprintf(stderr, "cudaMalloc failed: %s\n", cudaGetErrorString(status));
        exit(EXIT_FAILURE);
    }
    status = cudaMemset(d_magnitude, 0, sizeInBytes);
    if (status != cudaSuccess) {
        // Handle error
        fprintf(stderr, "cudaMemset failed: %s\n", cudaGetErrorString(status));
        exit(EXIT_FAILURE);
    }
    sizeInBytes = image.total() * sizeof(float);
    float* d_orientation;
    status = cudaMalloc((void **)&d_orientation, sizeInBytes);
    if (status != cudaSuccess) {
        // Handle error (e.g., printing an error message and exiting)
        fprintf(stderr, "cudaMalloc failed: %s\n", cudaGetErrorString(status));
        exit(EXIT_FAILURE);
    }
    status = cudaMemset(d_orientation, 0, sizeInBytes);
    if (status != cudaSuccess) {
        // Handle error
        fprintf(stderr, "cudaMemset failed: %s\n", cudaGetErrorString(status));
        exit(EXIT_FAILURE);
    }

    // Assuming image dimensions are reasonable for a grid of threads
    dim3 blockSize(16, 16);
    dim3 gridSize((image.cols + blockSize.x - 1) / blockSize.x,
                  (image.rows + blockSize.y - 1) / blockSize.y);

    // Allocate memory for histograms
    int cellSize = 64;
    int numCellsX = image.cols / cellSize;
    int numCellsY = image.rows / cellSize;
    size_t histSize = numCellsX * numCellsY * 9 * sizeof(float);
    float* d_histograms;
    status = cudaMalloc((void **)&d_histograms, histSize);
    if (status != cudaSuccess) {
        // Handle error
        fprintf(stderr, "cudaMalloc failed: %s\n", cudaGetErrorString(status));
        exit(EXIT_FAILURE);
    }
    status = cudaMemset(d_histograms, 0, histSize);
    if (status != cudaSuccess) {
        // Handle error
        fprintf(stderr, "cudaMemset failed: %s\n", cudaGetErrorString(status));
        exit(EXIT_FAILURE);
    }

    auto start = std::chrono::high_resolution_clock::now();
    // Launch the kernel
    computeGradients<<<gridSize, blockSize>>>(d_image, d_magnitude, d_orientation, d_histograms, image.cols, image.rows, cellSize);
    cudaDeviceSynchronize();
    auto end = std::chrono::high_resolution_clock::now();
    std::chrono::duration<double> elapsed = end - start;
    executionTime = elapsed.count();

    // Transfer histogram data from device to host
    float* h_histograms = new float[numCellsX * numCellsY * 9];
    cudaMemcpy(h_histograms, d_histograms, histSize, cudaMemcpyDeviceToHost);
    // Normalization
    for (int i = 0; i < numCellsX * numCellsY; ++i) {
        float sum = 0.0f;
        for (int j = 0; j < 9; ++j) {
            sum += h_histograms[i * 9 + j] * h_histograms[i * 9 + j];
        }
        sum = sqrtf(sum);
        for (int j = 0; j < 9; ++j) {
            h_histograms[i * 9 + j] /= (sum + 1e-6); // Small constant added to avoid division by zero
        }
    }

    // Block Formation and Descriptor Computation
    std::vector<float> descriptor;
    for (int i = 0; i < numCellsY - 1; ++i) {
        for (int j = 0; j < numCellsX - 1; ++j) {
            // Concatenate histograms of four cells into a block
            for (int y = i; y < i + 2; ++y) {
                for (int x = j; x < j + 2; ++x) {
                    for (int k = 0; k < 9; ++k) {
                        descriptor.push_back(h_histograms[(y * numCellsX + x) * 9 + k]);
                    }
                }
            }
        }
    }

    cudaFree(d_image);
    cudaFree(d_magnitude);
    cudaFree(d_orientation);
    delete[] h_histograms;
    cudaFree(d_histograms);

    return descriptor;
}

std::vector<float> computeDescriptorsSeq(const cv::Mat& image, double& executionTime) {
     // Allocate memory for histograms
    int cellSize = 64;
    int numCellsX = image.cols / cellSize;
    int numCellsY = image.rows / cellSize;

    std::vector<float> magnitude, orientation;
    std::vector<float> histograms(numCellsX * numCellsY * 9, 0.0f);
    auto start = std::chrono::high_resolution_clock::now();
    computeGradients_seq(image, magnitude, orientation, histograms, cellSize, 9);
    auto end = std::chrono::high_resolution_clock::now();
    std::chrono::duration<double> elapsed = end - start;
    executionTime = elapsed.count();

    // Normalization
    for (int i = 0; i < numCellsX * numCellsY; ++i) {
        float sum = 0.0f;
        for (int j = 0; j < 9; ++j) {
            sum += histograms[i * 9 + j] * histograms[i * 9 + j];
        }
        sum = sqrtf(sum);
        for (int j = 0; j < 9; ++j) {
            histograms[i * 9 + j] /= (sum + 1e-6); // Small constant added to avoid division by zero
        }
    }

    // Block Formation and Descriptor Computation
    std::vector<float> descriptor;
    for (int i = 0; i < numCellsY - 1; ++i) {
        for (int j = 0; j < numCellsX - 1; ++j) {
            // Concatenate histograms of four cells into a block
            for (int y = i; y < i + 2; ++y) {
                for (int x = j; x < j + 2; ++x) {
                    for (int k = 0; k < 9; ++k) {
                        descriptor.push_back(histograms[(y * numCellsX + x) * 9 + k]);
                    }
                }
            }
        }
    }

    return descriptor;
}

std::vector<float> computeDescriptors(const std::string& image_path, double& executionTime, bool cudaAccelerated = true) {
        // Example: Load an image using OpenCV
    cv::Mat imageBeforeResize = cv::imread(image_path, cv::IMREAD_GRAYSCALE);
    cv::Mat image;
    cv::resize(imageBeforeResize, image, cv::Size(224, 224)); // Resize to standard size
    if(image.empty()) {
        std::cerr << "Failed to load image." << std::endl;
        return std::vector<float>();
    }
    std::vector<float> descriptor;
    if(cudaAccelerated) {
        descriptor = computeDescriptorsCUDA(image, executionTime);
    } else {
        descriptor = computeDescriptorsSeq(image, executionTime);
    }

    // Compare histograms
    /*for (int i = 0; i < descriptor.size(); ++i) {
        if (std::abs(descriptor[i] - descriptor1[i]) > 1e-5) {
            std::cout << "Difference in histogram at index " << i << std::endl;
            std::cout << "CUDA histogram : " << descriptor[i] << std::endl;
            std::cout << "NOT CUDA histogram : " << descriptor1[i] << std::endl;
        }
    }*/

    return descriptor;
}

int main(int argc, char** argv) {
     if (argc != 6) {
        std::cerr << "Usage: " << argv[0] << " <cellSize> <blockSize> <numBins> <outputFile> <dimofimage>" << std::endl;
        return 1;
    }

    int cellSize = std::stoi(argv[1]);
    int blockSize = std::stoi(argv[2]);
    int numBins = std::stoi(argv[3]);
    std::string outputFile = argv[4];
    int dimofimage = std::stoi(argv[5]);//224
    int numCellsX = dimofimage / cellSize;
    int numCellsY = dimofimage / cellSize;
    int descriptorSizeDimension = (numCellsY - blockSize + 1) * (numCellsX - blockSize + 1) * blockSize * blockSize * numBins;

    std::string folder_path = "/content/drive/My Drive/GPU Computing/human detection dataset/1"; // Change this to your folder path
    std::vector<int> header;
    for (int i=1; i <= descriptorSizeDimension; ++i){
      header.push_back(i);
    }
    saveDescriptorAsCSVHeader(header, "descriptor_seq.csv", "label");
    saveDescriptorAsCSVHeader(header, "descriptor_cuda.csv", "label");
    header.clear();
    //People present class
    for (const auto& entry : fs::directory_iterator(folder_path)) {
        std::string file_path = entry.path().string();
        std::cout << "Processing image: " << file_path << std::endl;

        double executionTimeCuda = 0.0;
        double executionTimeSeq = 0.0;
        std::vector<float> descriptor = computeDescriptors(file_path, executionTimeCuda);
        std::vector<float> descriptor_seq = computeDescriptors(file_path, executionTimeSeq, false);
        if (descriptor.empty() || descriptor_seq.empty()) {
            std::cout << "Vector is empty" << std::endl;
        } else {
            int label = 1;
            saveDescriptorAsCSV(descriptor, "descriptor_cuda.csv", label, executionTimeCuda);
            saveDescriptorAsCSV(descriptor_seq, "descriptor_seq.csv", label, executionTimeSeq);
            descriptor.clear(); // Clear the vector
            descriptor_seq.clear();
        }
    }

      //Not people present class
      folder_path = "/content/drive/My Drive/GPU Computing/human detection dataset/0";
      for (const auto& entry : fs::directory_iterator(folder_path)) {
        std::string file_path = entry.path().string();
        std::cout << "Processing image: " << file_path << std::endl;

        double executionTimeCuda = 0.0;
        double executionTimeSeq = 0.0;
        std::vector<float> descriptor = computeDescriptors(file_path, executionTimeCuda);
        std::vector<float> descriptor_seq = computeDescriptors(file_path, executionTimeSeq, false);
        if (descriptor.empty() || descriptor_seq.empty()) {
            std::cout << "Vector is empty" << std::endl;
        } else {
            int label = 1;
            saveDescriptorAsCSV(descriptor, "descriptor_cuda.csv", label, executionTimeCuda);
            saveDescriptorAsCSV(descriptor_seq, "descriptor_seq.csv", label, executionTimeSeq);
            descriptor.clear(); // Clear the vector
            descriptor_seq.clear();
        }
    }


    return 0;
}


Overwriting gradient_computation.cu


In [7]:
# Experiment parameters
CELL_SIZES=(64)
BLOCK_SIZES=(2)
NUM_BINS=(9)
OUTPUT_FILE_BASENAME="descriptor"
DIM_IMG=224

In [21]:
!nvcc gradient_computation.cu -o gradient_computation `pkg-config --cflags --libs opencv4`

  class AffineWarper : public PlaneWarper
        ^


  class AffineWarper : public PlaneWarper
        ^

  class FeatherBlender : public Blender
        ^

  class MultiBandBlender : public Blender
        ^

  class AffineWarper : public PlaneWarper
        ^


  class AffineWarper : public PlaneWarper
        ^

  class FeatherBlender : public Blender
        ^

  class MultiBandBlender : public Blender
        ^



In [26]:
!./gradient_computation 64 2 9 descriptor.csv 224

[1;30;43mOutput streaming troncato alle ultime 5000 righe.[0m
Entering computeGradients
------------Summary of errors-----------
Bin out of range: 0
Histogram out of range: 0
Histogram Pos: 50176
Ending computeGradients
Processing image: /content/drive/My Drive/GPU Computing/human detection dataset/1/271.png
Entering computeGradients
------------Summary of errors-----------
Bin out of range: 0
Histogram out of range: 0
Histogram Pos: 50176
Ending computeGradients
Processing image: /content/drive/My Drive/GPU Computing/human detection dataset/1/213.png
Entering computeGradients
------------Summary of errors-----------
Bin out of range: 0
Histogram out of range: 0
Histogram Pos: 50176
Ending computeGradients
Processing image: /content/drive/My Drive/GPU Computing/human detection dataset/1/223.png
Entering computeGradients
------------Summary of errors-----------
Bin out of range: 0
Histogram out of range: 0
Histogram Pos: 50176
Ending computeGradients
Processing image: /content/drive/M

In [27]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score

# Assuming df is your DataFrame with features and labels
# Splitting data into features (X) and labels (y)

df = pd.read_csv("descriptor_seq.csv")
df1 = pd.read_csv("descriptor_cuda.csv")
new_value = 0

df.iloc[-1, df.columns.get_loc('label')] = new_value

X = df.drop(columns=['label'])  # Replace 'target_column' with the name of your target column
y = df['label']


In [28]:
df

Unnamed: 0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,...,x137,x138,x139,x140,x141,x142,x143,x144,label,Exec Time
0,0.707517,0.095859,0.090213,0.234234,0.163953,0.073105,0.075206,0.284448,0.555370,0.362358,...,0.358775,0.494454,0.258048,0.194932,0.174673,0.186705,0.243515,0.281776,1,0.005935
1,0.822687,0.280493,0.176226,0.297993,0.238534,0.083310,0.075005,0.112680,0.206139,0.643267,...,0.298940,0.304530,0.278457,0.226737,0.212460,0.187485,0.210920,0.272148,1,0.006722
2,0.522358,0.266394,0.422266,0.334273,0.378603,0.326701,0.207820,0.222299,0.153132,0.117661,...,0.202334,0.213733,0.218960,0.385172,0.468673,0.420357,0.208378,0.157847,1,0.005398
3,0.177814,0.058250,0.057333,0.241276,0.942931,0.088277,0.033646,0.035817,0.064515,0.079319,...,0.178250,0.235146,0.307402,0.475547,0.530086,0.408563,0.328167,0.166671,1,0.005490
4,0.655013,0.074662,0.070753,0.116034,0.367965,0.230707,0.191667,0.262129,0.502834,0.213235,...,0.341648,0.330521,0.282466,0.636378,0.250465,0.150787,0.206227,0.280177,1,0.005697
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
916,0.619156,0.112077,0.124536,0.143167,0.315157,0.187211,0.187977,0.354883,0.521948,0.219930,...,0.231810,0.200745,0.218844,0.801915,0.276247,0.213413,0.110165,0.091729,1,0.007302
917,0.034784,0.008650,0.026957,0.127687,0.983218,0.120785,0.018789,0.004897,0.000790,0.016739,...,0.134315,0.165870,0.685969,0.307762,0.240117,0.188144,0.093444,0.319522,1,0.007379
918,0.610380,0.230206,0.147448,0.156651,0.351962,0.496825,0.264001,0.210136,0.208797,0.379372,...,0.141768,0.168427,0.219934,0.461557,0.616182,0.459441,0.263488,0.124403,1,0.008334
919,0.447218,0.311607,0.307340,0.341800,0.374051,0.241301,0.171222,0.398952,0.324027,0.206929,...,0.142378,0.483637,0.484039,0.592594,0.155466,0.177496,0.077784,0.053165,1,0.012335


In [29]:
df1

Unnamed: 0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,...,x137,x138,x139,x140,x141,x142,x143,x144,label,Exec Time
0,0.707517,0.095859,0.090214,0.234234,0.163953,0.073105,0.075206,0.284447,0.555370,0.362358,...,0.358775,0.494454,0.258048,0.194932,0.174673,0.186705,0.243515,0.281776,1,0.000268
1,0.822687,0.280493,0.176226,0.297993,0.238534,0.083310,0.075005,0.112680,0.206139,0.643267,...,0.298940,0.304530,0.278457,0.226737,0.212460,0.187485,0.210920,0.272148,1,0.000047
2,0.522358,0.266394,0.422266,0.334273,0.378603,0.326701,0.207820,0.222299,0.153132,0.117661,...,0.202334,0.213733,0.218961,0.385171,0.468673,0.420357,0.208378,0.157847,1,0.000053
3,0.177814,0.058250,0.057333,0.241276,0.942931,0.088277,0.033646,0.035817,0.064515,0.079319,...,0.178250,0.235146,0.307402,0.475547,0.530087,0.408563,0.328167,0.166671,1,0.000052
4,0.655013,0.074662,0.070753,0.116034,0.367965,0.230707,0.191667,0.262129,0.502834,0.213235,...,0.341648,0.330521,0.282466,0.636378,0.250465,0.150787,0.206227,0.280177,1,0.000052
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
916,0.619156,0.112077,0.124535,0.143167,0.315157,0.187211,0.187977,0.354883,0.521948,0.219930,...,0.231810,0.200745,0.218844,0.801915,0.276247,0.213413,0.110165,0.091729,1,0.000058
917,0.034784,0.008650,0.026957,0.127687,0.983218,0.120785,0.018789,0.004897,0.000790,0.016739,...,0.134315,0.165870,0.685969,0.307762,0.240117,0.188144,0.093444,0.319522,1,0.000056
918,0.610379,0.230206,0.147448,0.156651,0.351962,0.496825,0.264001,0.210136,0.208797,0.379373,...,0.141768,0.168427,0.219934,0.461557,0.616182,0.459441,0.263488,0.124403,1,0.000057
919,0.447218,0.311607,0.307339,0.341800,0.374051,0.241301,0.171222,0.398952,0.324027,0.206929,...,0.142378,0.483637,0.484039,0.592594,0.155466,0.177496,0.077784,0.053165,1,0.000062


In [None]:
y

0    1
1    1
2    1
3    0
Name: label, dtype: int64

In [None]:
# Splitting data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Training a Linear Support Vector Machine
svm = LinearSVC()
svm.fit(X_train, y_train)

# Making predictions
y_pred = svm.predict(X_test)

# Evaluating accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.6857142857142857
