<a href="https://colab.research.google.com/github/godines51/AWS/blob/main/Atividade_cuda.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [119]:
!apt search cuda

Sorting... Done
Full Text Search... Done
[32mbart-cuda[0m/jammy 0.7.00-5 amd64
  tools for computational magnetic resonance imaging

[32mboinc-client-nvidia-cuda[0m/jammy 7.18.1+dfsg-4 amd64
  metapackage for CUDA-savvy BOINC client and manager

[32mcublasmp-cuda-11[0m/unknown 0.2.0.385-1 amd64
  NVIDIA cuBLASMp for CUDA 11

[32mcublasmp-cuda-12[0m/unknown 0.2.0.385-1 amd64
  NVIDIA cuBLASMp for CUDA 12

[32mcuda[0m/unknown 12.4.1-1 amd64
  CUDA meta-package

[32mcuda-11-7[0m/unknown 11.7.1-1 amd64
  CUDA 11.7 meta-package

[32mcuda-11-8[0m/unknown 11.8.0-1 amd64
  CUDA 11.8 meta-package

[32mcuda-12-0[0m/unknown 12.0.1-1 amd64
  CUDA 12.0 meta-package

[32mcuda-12-1[0m/unknown 12.1.1-1 amd64
  CUDA 12.1 meta-package

[32mcuda-12-2[0m/unknown 12.2.2-1 amd64
  CUDA 12.2 meta-package

[32mcuda-12-3[0m/unknown 12.3.2-1 amd64
  CUDA 12.3 meta-package

[32mcuda-12-4[0m/unknown 12.4.1-1 amd64
  CUDA 12.4 meta-package

[32mcuda-cccl-11-7[0m/unknown 11.7.91-1 amd64
  

In [120]:
!nvidia-smi

Sun May 19 00:39:27 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   37C    P8               9W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [121]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Tue_Aug_15_22:02:13_PDT_2023
Cuda compilation tools, release 12.2, V12.2.140
Build cuda_12.2.r12.2/compiler.33191640_0


In [122]:
%%writefile multiply_blocks_threads.cu
#include
#include

// Kernel function to multiply the elements of two arrays
_global_
void multiply(int n, float *x, float *y, float *z)
{
    int index = threadIdx.x + blockIdx.x * blockDim.x;
    int stride = blockDim.x * gridDim.x;
    for (int i = index; i < n; i += stride)
        z[i] = x[i] * y[i];
}

int main(void)
{
    int N = 1<<20;
    float *x, *y, *z;

    // Allocate Unified Memory – accessible from CPU or GPU
    cudaMallocManaged(&x, N*sizeof(float));
    cudaMallocManaged(&y, N*sizeof(float));
    cudaMallocManaged(&z, N*sizeof(float));

    // initialize x and y arrays on the host
    for (int i = 0; i < N; i++) {
        x[i] = 1.0f;
        y[i] = 2.0f;
    }

    // Run kernel on 1M elements on the GPU
    int threadsPerBlock = 256; // Define the number of threads per block
    int numBlocks = (N + threadsPerBlock - 1) / threadsPerBlock; // Calculate the number of blocks needed
    multiply<<>>(N, x, y, z);

    // Wait for GPU to finish before accessing on host
    cudaDeviceSynchronize();

    // Check for errors (all values should be 2.0f)
    float maxError = 0.0f;
    for (int i = 0; i < N; i++)
        maxError = fmax(maxError, fabs(z[i]-2.0f));
    std::cout << "Max error: " << maxError << std::endl;

    // Free memory
    cudaFree(x);
    cudaFree(y);
    cudaFree(z);

    return 0;
}

Overwriting multiply_blocks_threads.cu


In [123]:
!nvcc -o add_cuda add.cu

In [124]:
!nvprof ./add_cuda

==24779== NVPROF is profiling process 24779, command: ./add_cuda
Max error: 0
==24779== Profiling application: ./add_cuda
==24779== Profiling result:
            Type  Time(%)      Time     Calls       Avg       Min       Max  Name
 GPU activities:  100.00%  109.55ms         1  109.55ms  109.55ms  109.55ms  add(int, float*, float*)
      API calls:   66.48%  219.22ms         2  109.61ms  43.344us  219.18ms  cudaMallocManaged
                   33.22%  109.56ms         1  109.56ms  109.56ms  109.56ms  cudaDeviceSynchronize
                    0.16%  524.04us         2  262.02us  251.97us  272.07us  cudaFree
                    0.08%  277.62us         1  277.62us  277.62us  277.62us  cudaLaunchKernel
                    0.05%  156.15us       114  1.3690us     151ns  62.114us  cuDeviceGetAttribute
                    0.00%  15.666us         1  15.666us  15.666us  15.666us  cuDeviceGetName
                    0.00%  6.1930us         1  6.1930us  6.1930us  6.1930us  cuDeviceGetPCIBusId
    

In [125]:
!time ./add_cuda

Max error: 0

real	0m0.350s
user	0m0.137s
sys	0m0.207s
