# GROUP 1
### Members:
- Abdul Raafi M. Bandrang
- Dennis Paulo S. Delgado

## C Histogram Program

In [None]:
%%writefile C_Histogram.c
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define BINS 10
#define VEC_SIZE 268435456

int main()
{
  int *vector=(int *)malloc(sizeof(int) * VEC_SIZE);
  for (int i = 0; i <VEC_SIZE; i++)
  {
    vector[i] = i;
  }
  int histogram[BINS] = {0};
  clock_t start = clock();
  for (int i = 0; i < VEC_SIZE;i++)
  {
    int bin = vector[i] % BINS;
    histogram[bin]++;
  }
  clock_t end = clock();
  printf("Historgram Counter:\n");
  for(int i=0; i < BINS; i++)
  {
    printf("Bin %d: %d\n", i,histogram[i] );
  }
  double time =((double)(end-start)) / CLOCKS_PER_SEC;
  printf("execution time:%f seconds",time);
  free(vector);
  return 0;
}

Overwriting C_Histogram.c


In [None]:
%%shell
gcc C_Histogram.c -o C_Histogram



In [None]:
%%shell
./C_Histogram

Historgram Counter:
Bin 0: 26843546
Bin 1: 26843546
Bin 2: 26843546
Bin 3: 26843546
Bin 4: 26843546
Bin 5: 26843546
Bin 6: 26843545
Bin 7: 26843545
Bin 8: 26843545
Bin 9: 26843545
execution time:1.119167 seconds



## CUDA Histogram Program

In [None]:
%%writefile C_CudaHistogram.cu
#include <stdio.h>
#include <cuda.h>
#include <time.h>
#define VEC_SIZE 268435456
#define BINS 10
#define threadsPerBlock 256

__global__ void histogramFunc(int *vector,int *histogram, int n)
{
  int idx = blockIdx.x * blockDim.x + threadIdx.x;
  if(idx < n)
  {
    int bin = vector[idx] % BINS;
    atomicAdd(&histogram[bin],1);
  }
}

int main()
{
  int *vector,*histogram;
  int device = 0;
  cudaSetDevice(device);
  cudaMallocManaged(&vector,VEC_SIZE * sizeof(int));
  cudaMallocManaged(&histogram,VEC_SIZE * sizeof(int));
  cudaMemAdvise(vector,VEC_SIZE*sizeof(int),cudaMemAdviseSetPreferredLocation,device);
  cudaMemAdvise(histogram,BINS*sizeof(int),cudaMemAdviseSetPreferredLocation,device);
  cudaMemAdvise(histogram,BINS*sizeof(int),cudaMemAdviseSetAccessedBy,device);

  for(int i=0; i<VEC_SIZE;i++)
  {
    vector[i]=i;
  }
  int blocks = (VEC_SIZE + threadsPerBlock-1) / threadsPerBlock;
  clock_t start = clock();
  histogramFunc<<<blocks,threadsPerBlock>>>(vector,histogram,VEC_SIZE);
  clock_t end = clock();
  cudaDeviceSynchronize();

  for(int i = 0 ; i < BINS;i++)
  {
    printf("Bin %d: %d\n",i,histogram[i]);
  }
  double time =((double)(end-start)) / CLOCKS_PER_SEC;
  printf("execution time:%f seconds",time);
  cudaFree(vector);
  cudaFree(histogram);
  return 0;

}

Overwriting C_CudaHistogram.cu


In [None]:
!nvcc -arch=sm_75 C_CudaHistogram.cu -o C_CudaHistogram
!./C_CudaHistogram

Bin 0: 26843546
Bin 1: 26843546
Bin 2: 26843546
Bin 3: 26843546
Bin 4: 26843546
Bin 5: 26843546
Bin 6: 26843545
Bin 7: 26843545
Bin 8: 26843545
Bin 9: 26843545
execution time:0.000241 seconds