In [1]:
! ls /usr/local/

bin    cuda	cuda-11.8  games	       include	lib64	   man	 share
colab  cuda-11	etc	   _gcs_config_ops.so  lib	licensing  sbin  src


In [2]:
! nvcc --version # nvcc compiler version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2022 NVIDIA Corporation
Built on Wed_Sep_21_10:33:58_PDT_2022
Cuda compilation tools, release 11.8, V11.8.89
Build cuda_11.8.r11.8/compiler.31833905_0


In [3]:
%%writefile VectorOperations.cu
#include<stdio.h>
#include<stdlib.h>
#include<cuda_runtime.h>

// Kernel Function
__global__ void SVector(float* V, float* S1, float* S2, int N){
    int tid = threadIdx.x + blockIdx.x * blockDim.x;

    if(tid < N - 1){
      S1[tid] = V[tid] + V[tid + 1]; // Vector S_{1} := S[i] = V[i] + V[i+1]
    }

    if(tid > 0 && tid < N - 1){
      S2[tid - 1] = (V[tid + 1] + V[tid - 1]) / 2.0; // Vector S_{2} := S[i] = V[i+1] + V[i-1]/2
    }

}

// Main code executed by the host
int main(void){
  const int VN = 10; // vector V length
  float Vh[VN] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0}; // initialize vector V

  const int S1N = VN - 1; // vector S1 length
  float S1h[S1N];

  const int S2N = VN - 2; // vector S2 length
  float S2h[S2N];


  float *Vd, *S1d, *S2d;
  cudaMalloc((void**) &Vd, VN * sizeof(float)); // allocate GPU memory
  cudaMalloc((void**) &S1d, S1N * sizeof(float));
  cudaMalloc((void**) &S2d, S2N * sizeof(float));

  cudaMemcpy(Vd, Vh, VN * sizeof(float), cudaMemcpyHostToDevice); // copy data from host to device (vector V)

  int BlockSize = 256; // threads per block
  int NoBlocks = (S1N + BlockSize - 1) / BlockSize; // no. of blocks

  SVector<<<NoBlocks, BlockSize>>>(Vd, S1d, S2d, VN); // kernel function calling

  cudaMemcpy(S1h, S1d, S1N * sizeof(float), cudaMemcpyDeviceToHost); // copy data from device back to host (vector S_{1})
  cudaMemcpy(S2h, S2d, S2N * sizeof(float), cudaMemcpyDeviceToHost); // copy data from device back to host (vector S_{2})

  printf("\n Vector V.\n");
  for(int i = 0; i < VN; i++){
    printf("%.2lf ", Vh[i]);
  }

  printf("\n Vector S_{1}.\n");
  for(int i = 0; i < VN - 1; i++){
    printf("%.2lf ", S1h[i]);
  }

  printf("\n Vector S_{2}.\n");
  for(int i = 0; i < VN - 2; i++){
    printf("%.2lf ", S2h[i]);
  }

  cudaFree(Vd); // free GPU memory
  cudaFree(S1d);
  cudaFree(S2d);

  return(0);
}

Writing VectorOperations.cu


In [4]:
! nvcc VectorOperations.cu -o test

In [5]:
! ./test


 Vector V.
1.00 2.00 3.00 4.00 5.00 6.00 7.00 8.00 9.00 10.00 
 Vector S_{1}.
3.00 5.00 7.00 9.00 11.00 13.00 15.00 17.00 19.00 
 Vector S_{2}.
2.00 3.00 4.00 5.00 6.00 7.00 8.00 9.00 