In [9]:
%%writefile assignment6.cu
#include <stdio.h>
#include <math.h>
#include <cuda_runtime.h>
#include <chrono>


__global__ void computeSqrt(const float *A, float *C, int N) {
    int idx = blockIdx.x * blockDim.x + threadIdx.x;
    if (idx < N) {
        C[idx] = sqrtf(A[idx]);
    }
}


void runSqrtKernel(int N) {
    float *h_A = new float[N];
    float *h_C = new float[N];


    for (int i = 0; i < N; ++i)
        h_A[i] = static_cast<float>(i + 1);

    float *d_A, *d_C;
    cudaMalloc(&d_A, N * sizeof(float));
    cudaMalloc(&d_C, N * sizeof(float));

    cudaMemcpy(d_A, h_A, N * sizeof(float), cudaMemcpyHostToDevice);

    int threadsPerBlock = 256;
    int blocks = (N + threadsPerBlock - 1) / threadsPerBlock;


    auto start = std::chrono::high_resolution_clock::now();

    computeSqrt<<<blocks, threadsPerBlock>>>(d_A, d_C, N);
    cudaDeviceSynchronize();

    auto end = std::chrono::high_resolution_clock::now();
    std::chrono::duration<double, std::milli> elapsed = end - start;

    printf("N = %d, Time = %f ms\n", N, elapsed.count());

    cudaMemcpy(h_C, d_C, N * sizeof(float), cudaMemcpyDeviceToHost);


    delete[] h_A;
    delete[] h_C;
    cudaFree(d_A);
    cudaFree(d_C);
}

int main() {
    const int numTests = 4;
    int sizes[numTests] = {50000, 500000, 5000000, 50000000};



    for (int i = 0; i < 4; ++i) {

        runSqrtKernel(sizes[i]);
    }

    return 0;
}


Overwriting assignment6.cu


In [10]:
!nvcc -O2 -o assignment6 assignment6.cu
!./assignment6

N = 50000, Time = 7.629058 ms
N = 500000, Time = 0.095055 ms
N = 5000000, Time = 0.070621 ms
N = 50000000, Time = 0.061834 ms


In [4]:
!pip install pandas xlsxwriter openpyxl


Collecting xlsxwriter
  Downloading XlsxWriter-3.2.3-py3-none-any.whl.metadata (2.7 kB)
Downloading XlsxWriter-3.2.3-py3-none-any.whl (169 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/169.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m169.4/169.4 kB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: xlsxwriter
Successfully installed xlsxwriter-3.2.3


In [11]:
import pandas as pd


data = {
    'Array Size': [50000, 500000, 5000000, 50000000],
    'Time (ms)': [7.6290,0.0950,0.0706,0.0618]
}


df = pd.DataFrame(data)


with pd.ExcelWriter('results.xlsx', engine='xlsxwriter') as writer:
    df.to_excel(writer, sheet_name='Timings', index=False)

    workbook  = writer.book
    worksheet = writer.sheets['Timings']


    chart = workbook.add_chart({'type': 'line'})


    chart.add_series({
        'name':       'Execution Time',
        'categories': ['Timings', 1, 0, len(df), 0],
        'values':     ['Timings', 1, 1, len(df), 1],
        'marker':     {'type': 'circle', 'size': 5}
    })


    chart.set_title({'name': 'CUDA Square Root Kernel Performance'})
    chart.set_x_axis({'name': 'Array Size'})
    chart.set_y_axis({'name': 'Execution Time (ms)'})
    chart.set_style(10)


    worksheet.insert_chart('D2', chart)
