In [1]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Tue_Aug_15_22:02:13_PDT_2023
Cuda compilation tools, release 12.2, V12.2.140
Build cuda_12.2.r12.2/compiler.33191640_0


In [2]:

pip install git+https://github.com/afnan47/cuda.git

Collecting git+https://github.com/afnan47/cuda.git
  Cloning https://github.com/afnan47/cuda.git to /tmp/pip-req-build-3rjl4pxi
  Running command git clone --filter=blob:none --quiet https://github.com/afnan47/cuda.git /tmp/pip-req-build-3rjl4pxi
  Resolved https://github.com/afnan47/cuda.git to commit aac710a35f52bb78ab34d2e52517237941399eff
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: NVCCPlugin
  Building wheel for NVCCPlugin (setup.py) ... [?25l[?25hdone
  Created wheel for NVCCPlugin: filename=NVCCPlugin-0.0.2-py3-none-any.whl size=4289 sha256=d73d0fb869e099251747388c33943e5e581d818237bc33c6fe66b1a8ed2e3eae
  Stored in directory: /tmp/pip-ephem-wheel-cache-47fowu0x/wheels/aa/f3/44/e10c1d226ec561d971fcd4b0463f6bff08602afa928a3e7bc7
Successfully built NVCCPlugin
Installing collected packages: NVCCPlugin
Successfully installed NVCCPlugin-0.0.2


In [3]:
%load_ext nvcc_plugin


created output directory at /content/src
Out bin /content/result.out


In [19]:
%%writefile merge.cu
#include <iostream>
#include <chrono>

// CUDA kernel for Bubble Sort
__global__ void bubble_sort(int* d_arr, int size) {
    int idx = blockIdx.x * blockDim.x + threadIdx.x; // Thread index
    for (int i = 0; i < size - 1; i++) {
        int j = idx + i; // Offset to perform the bubble sort step
        if (j < size - 1 && d_arr[j] > d_arr[j + 1]) { // Swap if out of order
            int temp = d_arr[j];
            d_arr[j] = d_arr[j + 1];
            d_arr[j + 1] = temp;
        }
        __syncthreads(); // Synchronize threads within block
    }
}

// Function for Bubble Sort on CPU
void bubble_sort_cpu(int* arr, int size) {
    for (int i = 0; i < size - 1; i++) {
        for (int j = 0; j < size - 1 - i; j++) {
            if (arr[j] > arr[j + 1]) { // Swap if out of order
                int temp = arr[j];
                arr[j] = arr[j + 1];
                arr[j + 1] = temp;
            }
        }
    }
}

int main() {
    int size;
    std::cout << "Enter the size of the array: ";
    std::cin >> size;

    // Allocate memory for the array
    int* h_arr = new int[size];

    // Input array elements from the user
    std::cout << "Enter " << size << " elements of the array: ";
    for (int i = 0; i < size; i++) {
        std::cin >> h_arr[i];
    }

    // Bubble Sort on CPU
    auto start_cpu = std::chrono::high_resolution_clock::now();
    bubble_sort_cpu(h_arr, size);
    auto end_cpu = std::chrono::high_resolution_clock::now();
    std::chrono::duration<double> duration_cpu = end_cpu - start_cpu;
    std::cout << "Sequential Bubble Sort took " << duration_cpu.count() << " seconds\n";

    // Copying data to the device for parallel Bubble Sort
    int* d_arr;
    cudaMalloc(&d_arr, size * sizeof(int));
    cudaMemcpy(d_arr, h_arr, size * sizeof(int), cudaMemcpyHostToDevice);

    // Bubble Sort on GPU
    auto start_gpu = std::chrono::high_resolution_clock::now();
    int blockSize = 256; // Threads per block
    int gridSize = (size + blockSize - 1) / blockSize; // Blocks
    bubble_sort<<<gridSize, blockSize>>>(d_arr, size);
    cudaDeviceSynchronize(); // Wait for GPU to finish
    auto end_gpu = std::chrono::high_resolution_clock::now();
    std::chrono::duration<double> duration_gpu = end_gpu - start_gpu;
    std::cout << "Parallel Bubble Sort took " << duration_gpu.count() << " seconds\n";

    // Copying sorted data back to host
    cudaMemcpy(h_arr, d_arr, size * sizeof(int), cudaMemcpyDeviceToHost);

    // Display sorted array
    std::cout << "Sorted Array: ";
    for (int i = 0; i < size; i++) {
        std::cout << h_arr[i] << " ";
    }
    std::cout << std::endl;

    // Free device memory
    cudaFree(d_arr);
    // Free host memory
    delete[] h_arr;

    return 0;
}


Overwriting merge.cu


In [20]:
!nvcc merge.cu -o merge

In [21]:
!./merge

Enter the size of the array: 7
Enter 7 elements of the array: 64 34 25 12 22 11 90
Sequential Bubble Sort took 4.81e-07 seconds
Parallel Bubble Sort took 0.000252905 seconds
Sorted Array: 11 12 22 25 34 64 90 
