In [4]:
%%writefile ok.cu
#include <iostream>
#include <cstdlib>
#include <ctime>
#include <cuda_runtime.h>

#define ARRAY_SIZE 10000
#define BLOCK_SIZE 256

// CUDA kernel for parallel Bubble Sort
__global__ void bubbleSort(int *arr, int size) {
    int idx = threadIdx.x + blockIdx.x * blockDim.x;
    if (idx < size) {
        for (int i = 0; i < size - 1; ++i) {
            for (int j = 0; j < size - i - 1; ++j) {
                if (arr[j] > arr[j + 1]) {
                    int temp = arr[j];
                    arr[j] = arr[j + 1];
                    arr[j + 1] = temp;
                }
            }
        }
    }
}

// CUDA kernel for parallel Merge Sort (split and merge phases)
__global__ void mergeSort(int *arr, int *temp, int left, int right) {
    int idx = threadIdx.x + blockIdx.x * blockDim.x;
    if (idx < right - left) {
        int mid = left + (right - left) / 2;
        int i = left, j = mid + 1, k = left;

        // Merge two sorted subarrays into temp array
        while (i <= mid && j <= right) {
            if (arr[i] <= arr[j])
                temp[k++] = arr[i++];
            else
                temp[k++] = arr[j++];
        }
        while (i <= mid)
            temp[k++] = arr[i++];
        while (j <= right)
            temp[k++] = arr[j++];

        // Copy temp array back to original array
        for (int i = left; i <= right; ++i)
            arr[i] = temp[i];
    }
}

// Function to initialize array with random values
void initializeArray(int *arr, int size) {
    srand(time(NULL));
    for (int i = 0; i < size; ++i)
        arr[i] = rand() % 1000; // Generate random values between 0 and 999
}

int main() {
    // Initialize array with random values
    int *arr = new int[ARRAY_SIZE];
    initializeArray(arr, ARRAY_SIZE);

    // Sequential Bubble Sort
    clock_t start = clock();
    // Call sequential Bubble Sort function here
    clock_t end = clock();
    double seqTime = double(end - start) / CLOCKS_PER_SEC;

    // Allocate memory on device
    int *d_arr, *d_temp;
    cudaMalloc((void**)&d_arr, ARRAY_SIZE * sizeof(int));
    cudaMalloc((void**)&d_temp, ARRAY_SIZE * sizeof(int));

    // Copy data from host to device
    cudaMemcpy(d_arr, arr, ARRAY_SIZE * sizeof(int), cudaMemcpyHostToDevice);

    // Parallel Bubble Sort
    start = clock();
    bubbleSort<<<(ARRAY_SIZE + BLOCK_SIZE - 1) / BLOCK_SIZE, BLOCK_SIZE>>>(d_arr, ARRAY_SIZE);
    cudaDeviceSynchronize();
    end = clock();
    double parallelBubbleTime = double(end - start) / CLOCKS_PER_SEC;

    // Parallel Merge Sort
    start = clock();
    mergeSort<<<(ARRAY_SIZE + BLOCK_SIZE - 1) / BLOCK_SIZE, BLOCK_SIZE>>>(d_arr, d_temp, 0, ARRAY_SIZE - 1);
    cudaDeviceSynchronize();
    end = clock();
    double parallelMergeTime = double(end - start) / CLOCKS_PER_SEC;

    // Copy sorted array from device to host
    cudaMemcpy(arr, d_arr, ARRAY_SIZE * sizeof(int), cudaMemcpyDeviceToHost);

    // Free device memory
    cudaFree(d_arr);
    cudaFree(d_temp);

    // Output results
    std::cout << "Sequential Bubble Sort Time: " << seqTime << " seconds" << std::endl;
    std::cout << "Parallel Bubble Sort Time: " << parallelBubbleTime << " seconds" << std::endl;
    std::cout << "Parallel Merge Sort Time: " << parallelMergeTime << " seconds" << std::endl;

    delete[] arr;

    return 0;
}


Overwriting ok.cu


In [5]:
%%script bash
nvcc ok.cu -o assign1

In [None]:
!./assign1

Sequential Bubble Sort Time: 1e-06 seconds
Parallel Bubble Sort Time: 1.4e-05 seconds
Parallel Merge Sort Time: 1e-06 seconds


In [None]:
%%writefile sorting.cu