<a href="https://colab.research.google.com/github/cocyten27/Curso_CUDA/blob/main/ECU1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!uv pip install -q --system numba-cuda==0.4.0
import numpy as np
from numba import cuda
import time
import os
from numba import config
config.CUDA_ENABLE_PYNVJITLINK = 1

In [None]:
# CUDA steps:
#

# 1. CUDA kernel
@cuda.jit
def first_kernel(a, result):
  idx = cuda.grid(1)
  if idx < a.size:
    result[idx] = a[idx]

# Host
def main():
  #2. Initialize data on CPU
  N = 10_000_000
  a_cpu = np.arange(N, dtype=np.float32)

  # -------------------------------------
  # CPU Computation
  #--------------------------------------
  start = time.time()
  result_cpu = a_cpu
  cpu_time = time.time() - start
  print(f"CPU time: {cpu_time * 1e3:.2f} ms")

  #----------------------------------------
  # GPU coputation
  #----------------------------------------
  # 2. Transfer from CPU to GPU
  start = time.time()
  a_gpu = cuda.to_device(a_cpu)
  result_gpu = cuda.device_array_like(a_cpu) #reserve memoria
  transfer_in_time = time.time() - start

  # Kernel launch
  threads_per_block = 128
  blocks_per_grid = (N + threads_per_block -1) // threads_per_block
  start = time.time()
  first_kernel[blocks_per_grid, threads_per_block](a_gpu, result_gpu)
  cuda.synchronize()
  kernel_time = time.time() - start

  # Copy Back
  start = time.time()
  result_from_gpu = result_gpu.copy_to_host()
  cuda.synchronize()
  transfer_out_time = time.time() - start

  # Report
  print(f"GPU transfer to device: {transfer_in_time * 1e3:.2f} ms")
  print(f"GPU kernel execution:   {kernel_time * 1e3:.2f} ms")
  print(f"GPU transfer to host:   {transfer_out_time * 1e3:.2f} ms")
  print(f"Total GPU time:         {(transfer_in_time + kernel_time + transfer_out_time) * 1e3:.2f} ms")

  # Cleanup
  del a_gpu, result_gpu
  cuda.close()

if __name__ == "__main__":
  main()


CPU time: 0.00 ms
GPU transfer to device: 179.43 ms
GPU kernel execution:   1063.13 ms
GPU transfer to host:   15.02 ms
Total GPU time:         1257.58 ms
