In [None]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2022 NVIDIA Corporation
Built on Wed_Sep_21_10:33:58_PDT_2022
Cuda compilation tools, release 11.8, V11.8.89
Build cuda_11.8.r11.8/compiler.31833905_0


In [None]:
%%writefile vector.cu
#include <stdio.h>

#define N 50

__global__
void vectorAdd(int* a, int* b, int* c)
{
    int i = blockIdx.x * blockDim.x + threadIdx.x;
    if (i < N)
        c[i] = a[i] + b[i];
}

int main()
{
    int *a, *b, *c;  // Host vectors
    int *d_a, *d_b, *d_c;  // Device vectors

    // Allocate memory for host vectors
    a = (int*)malloc(N * sizeof(int));
    b = (int*)malloc(N * sizeof(int));
    c = (int*)malloc(N * sizeof(int));

    // Initialize host vectors
    for (int i = 0; i < N; ++i)
    {
        a[i] = i;
        b[i] = i;
    }

    // Allocate memory for device vectors
    cudaMalloc(&d_a, N * sizeof(int));
    cudaMalloc(&d_b, N * sizeof(int));
    cudaMalloc(&d_c, N * sizeof(int));

    // Copy host vectors to device
    cudaMemcpy(d_a, a, N * sizeof(int), cudaMemcpyHostToDevice);
    cudaMemcpy(d_b, b, N * sizeof(int), cudaMemcpyHostToDevice);

    // Launch kernel on device
    int blockSize = 256;
    int gridSize = (N + blockSize - 1) / blockSize;
    vectorAdd<<<gridSize, blockSize>>>(d_a, d_b, d_c);

    // Copy result from device to host
    cudaMemcpy(c, d_c, N * sizeof(int), cudaMemcpyDeviceToHost);

    // Print the result
    for (int i = 0; i < N; ++i)
    {
        printf("%d + %d = %d\n", a[i], b[i], c[i]);
    }

    // Free device memory
    cudaFree(d_a);
    cudaFree(d_b);
    cudaFree(d_c);

    // Free host memory
    free(a);
    free(b);
    free(c);

    return 0;
}

Overwriting vector.cu


In [None]:
!nvcc vector.cu -o vect

In [None]:
!./vect


0 + 0 = 0
1 + 1 = 2
2 + 2 = 4
3 + 3 = 6
4 + 4 = 8
5 + 5 = 10
6 + 6 = 12
7 + 7 = 14
8 + 8 = 16
9 + 9 = 18
10 + 10 = 20
11 + 11 = 22
12 + 12 = 24
13 + 13 = 26
14 + 14 = 28
15 + 15 = 30
16 + 16 = 32
17 + 17 = 34
18 + 18 = 36
19 + 19 = 38
20 + 20 = 40
21 + 21 = 42
22 + 22 = 44
23 + 23 = 46
24 + 24 = 48
25 + 25 = 50
26 + 26 = 52
27 + 27 = 54
28 + 28 = 56
29 + 29 = 58
30 + 30 = 60
31 + 31 = 62
32 + 32 = 64
33 + 33 = 66
34 + 34 = 68
35 + 35 = 70
36 + 36 = 72
37 + 37 = 74
38 + 38 = 76
39 + 39 = 78
40 + 40 = 80
41 + 41 = 82
42 + 42 = 84
43 + 43 = 86
44 + 44 = 88
45 + 45 = 90
46 + 46 = 92
47 + 47 = 94
48 + 48 = 96
49 + 49 = 98


In [None]:
!nvprof ./vect

==11401== NVPROF is profiling process 11401, command: ./vect
0 + 0 = 0
1 + 1 = 2
2 + 2 = 4
3 + 3 = 6
4 + 4 = 8
5 + 5 = 10
6 + 6 = 12
7 + 7 = 14
8 + 8 = 16
9 + 9 = 18
10 + 10 = 20
11 + 11 = 22
12 + 12 = 24
13 + 13 = 26
14 + 14 = 28
15 + 15 = 30
16 + 16 = 32
17 + 17 = 34
18 + 18 = 36
19 + 19 = 38
20 + 20 = 40
21 + 21 = 42
22 + 22 = 44
23 + 23 = 46
24 + 24 = 48
25 + 25 = 50
26 + 26 = 52
27 + 27 = 54
28 + 28 = 56
29 + 29 = 58
30 + 30 = 60
31 + 31 = 62
32 + 32 = 64
33 + 33 = 66
34 + 34 = 68
35 + 35 = 70
36 + 36 = 72
37 + 37 = 74
38 + 38 = 76
39 + 39 = 78
40 + 40 = 80
41 + 41 = 82
42 + 42 = 84
43 + 43 = 86
44 + 44 = 88
45 + 45 = 90
46 + 46 = 92
47 + 47 = 94
48 + 48 = 96
49 + 49 = 98
==11401== Profiling application: ./vect
==11401== Profiling result:
            Type  Time(%)      Time     Calls       Avg       Min       Max  Name
 GPU activities:   46.32%  4.6400us         1  4.6400us  4.6400us  4.6400us  vectorAdd(int*, int*, int*)
                   32.58%  3.2640us         2  1.6320us  1.