### Obtain the GPU

***ATTENTION***: before continuing, in the menu at the top of the page click on `Edit > Notebook settings` and make sure that *Hardware accelerator* is set on GPU

In [None]:
!nvcc --version
!nvidia-smi

***ATTENTION***: you must be sure that the GPU you are going to use is a *Tesla K80*

> **if not**: in the menu at the top of the page click on `Runtime > Factory reset runtime` and restart the code above (it could be necessary to do it several times)

### Mount the drive

***ATTENTION***: extract all files of "Project_HPC_CUDA.zip" in a folder called `ContestCUDA` in your Google Drive and execute all codes below

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
%cd /content/drive/MyDrive/ContestCUDA/

### Start the GPU

In [None]:
!pip install git+git://github.com/andreinechaev/nvcc4jupyter.git
%load_ext nvcc_plugin

In [None]:
!apt-get --purge remove cuda nvidia* libnvidia-*
!dpkg -l | grep cuda- | awk '{print $2}' | xargs -n1 dpkg --purge
!apt-get remove cuda-*
!apt autoremove
!apt-get update

In [None]:
!wget https://developer.nvidia.com/compute/cuda/9.2/Prod/local_installers/cuda-repo-ubuntu1710-9-2-local_9.2.88-1_amd64 -O cuda-repo-ubuntu1710-9-2-local_9.2.88-1_amd64.deb
!dpkg -i cuda-repo-ubuntu1710-9-2-local_9.2.88-1_amd64.deb
!apt-key add /var/cuda-repo-9-2-local/7fa2af80.pub
!apt-get update
!apt-get install cuda-9.2

In [None]:
!nvcc --version

***ATTENTION***: you must be sure that the version is *V9.2.88*

> **if not**: something went wrong and everything has to be restarded

### Get all the info (you can skip this section)

#####       CPU info

In [None]:
!cat /proc/cpuinfo

##### MEM info

In [None]:
!cat /proc/meminfo

##### DSK info

In [None]:
!df -h

##### GPU info

- Essential info

In [None]:
%%cu

#include <stdio.h>
#include <stdlib.h>

void deviceQuery()
{
  cudaDeviceProp prop;
  int nDevices=0, i;
  cudaError_t ierr;

  ierr = cudaGetDeviceCount(&nDevices);
  if (ierr != cudaSuccess) { printf("Sync error: %s\n", cudaGetErrorString(ierr)); }



  for( i = 0; i < nDevices; ++i )
  {
     ierr = cudaGetDeviceProperties(&prop, i);
     printf("Device number: %d\n", i);
     printf("  Device name: %s\n", prop.name);
     printf("  Compute capability: %d.%d\n\n", prop.major, prop.minor);
     
     printf("  Clock Rate: %d kHz\n", prop.clockRate);
     printf("  Total SMs: %d \n", prop.multiProcessorCount);
     printf("  Shared Memory Per SM: %lu bytes\n", prop.sharedMemPerMultiprocessor);
     printf("  Registers Per SM: %d 32-bit\n", prop.regsPerMultiprocessor);
     printf("  Max threads per SM: %d\n", prop.maxThreadsPerMultiProcessor);
     printf("  L2 Cache Size: %d bytes\n", prop.l2CacheSize);
     printf("  Total Global Memory: %lu bytes\n", prop.totalGlobalMem);
     printf("  Memory Clock Rate: %d kHz\n\n", prop.memoryClockRate);
     
     
     printf("  Max threads per block: %d\n", prop.maxThreadsPerBlock);
     printf("  Max threads in X-dimension of block: %d\n", prop.maxThreadsDim[0]);
     printf("  Max threads in Y-dimension of block: %d\n", prop.maxThreadsDim[1]);
     printf("  Max threads in Z-dimension of block: %d\n\n", prop.maxThreadsDim[2]);

     printf("  Max blocks in X-dimension of grid: %d\n", prop.maxGridSize[0]);
     printf("  Max blocks in Y-dimension of grid: %d\n", prop.maxGridSize[1]);
     printf("  Max blocks in Z-dimension of grid: %d\n\n", prop.maxGridSize[2]);     
     
     printf("  Shared Memory Per Block: %lu bytes\n", prop.sharedMemPerBlock);
     printf("  Registers Per Block: %d 32-bit\n", prop.regsPerBlock);
     printf("  Warp size: %d\n\n", prop.warpSize);

  }
}

int main() {
    deviceQuery();
}

- Complete info

In [None]:
%cd /usr/local
%cd cuda-9.2/
%cd samples
%cd 1_Utilities
%cd deviceQuery
!make
!./deviceQuery
%cd /content/drive/MyDrive/ContestCUDA/

- Further info

In [None]:
n = [1000000, 10000000, 100000000]
max = [100, 1000]
block = [128]
!nvcc -o ./executable/execg ./src/global.cu
!nvcc -o ./executable/execs ./src/shared.cu
!nvcc -o ./executable/exect ./src/texture.cu
for length in n:
  for maximum in max:
    for blockSize in block:
      !nvprof --print-gpu-trace ./executable/execg $length $maximum $blockSize
      print("\n\n")
      !nvprof --print-gpu-trace ./executable/execs $length $maximum $blockSize
      print("\n\n")
      !nvprof --print-gpu-trace ./executable/exect $length $maximum $blockSize
      print("\n\n")

##### BANDWIDTH info

In [None]:
%cd /usr/local/cuda/samples
%cd 1_Utilities/bandwidthTest/
!make
!./bandwidthTest --mode-range --start=1000 --end=1000000 --increment=99000
%cd /content/drive/MyDrive/ContestCUDA/

##### Number of executed integer operations

In [None]:
n = [1000000, 10000000, 100000000]
max = [100, 1000]
block = [128]
!nvcc -o ./executable/execg ./src/global.cu
!nvcc -o ./executable/execs ./src/shared.cu
!nvcc -o ./executable/exect ./src/texture.cu
for length in n:
  for maximum in max:
    for blockSize in block:
      !nvprof --metrics inst_integer ./executable/execg $length $maximum $blockSize
      print("\n\n")
      !nvprof --metrics inst_integer ./executable/execs $length $maximum $blockSize
      print("\n\n")
      !nvprof --metrics inst_integer ./executable/exect $length $maximum $blockSize
      print("\n\n")

### Start the measures

***ATTENTION***: it takes a lot of time. This is the reason why our measures are already included, you can see them in `measure` folder

In [None]:
import datetime
import pytz
TIMEFORMAT='%Y-%m-%d.%H:%M:%S'
TIMESTAMP=datetime.datetime.now().astimezone(pytz.timezone("Europe/Rome")).strftime(TIMEFORMAT)
n = [1000000, 10000000, 100000000]
max = [100, 1000]
block = [32, 64, 128, 256, 512, 1024]
!nvcc -o ./executable/execg ./src/global.cu
!nvcc -o ./executable/execs ./src/shared.cu
!nvcc -o ./executable/exect ./src/texture.cu
for length in n:
  for maximum in max:
    for blockSize in block:
      OUT_FILE_g="./measure/"+TIMESTAMP+"/SIZE-"+str(length)+"-RANGE-"+str(maximum)+"/global_measure_"+str(length)+"_"+str(maximum)+".csv"
      OUT_FILE_s="./measure/"+TIMESTAMP+"/SIZE-"+str(length)+"-RANGE-"+str(maximum)+"/shared_measure_"+str(length)+"_"+str(maximum)+".csv"
      OUT_FILE_t="./measure/"+TIMESTAMP+"/SIZE-"+str(length)+"-RANGE-"+str(maximum)+"/texture_measure_"+str(length)+"_"+str(maximum)+".csv"
      %mkdir -p $(dirname $OUT_FILE_g)
      %mkdir -p $(dirname $OUT_FILE_s)
      %mkdir -p $(dirname $OUT_FILE_t)
      !echo "blockSize;gridSize;elapsedInit;elapsedSort" > $OUT_FILE_g
      !echo "blockSize;gridSize;elapsedInit;elapsedSort" > $OUT_FILE_s
      !echo "blockSize;gridSize;elapsedInit;elapsedSort" > $OUT_FILE_t
      for _ in range(50):
        !./executable/execg $length $maximum $blockSize >> $OUT_FILE_g
        !./executable/execs $length $maximum $blockSize >> $OUT_FILE_s
        !./executable/exect $length $maximum $blockSize >> $OUT_FILE_t