<a href="https://colab.research.google.com/github/VetaAgafonova/HPC_labs/blob/PiMonteCarlo/Lab3_Pi_Monte_Carlo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!/usr/local/cuda/bin/nvcc --version
!pip install git+https://github.com/andreinechaev/nvcc4jupyter.git
%load_ext nvcc_plugin
!cuda-install-samples-11.2.sh ~ && cd /root/NVIDIA_CUDA-11.2_Samples/

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2022 NVIDIA Corporation
Built on Wed_Sep_21_10:33:58_PDT_2022
Cuda compilation tools, release 11.8, V11.8.89
Build cuda_11.8.r11.8/compiler.31833905_0
Collecting git+https://github.com/andreinechaev/nvcc4jupyter.git
  Cloning https://github.com/andreinechaev/nvcc4jupyter.git to /tmp/pip-req-build-ur4ir9_h
  Running command git clone --filter=blob:none --quiet https://github.com/andreinechaev/nvcc4jupyter.git /tmp/pip-req-build-ur4ir9_h
  Resolved https://github.com/andreinechaev/nvcc4jupyter.git to commit 0a71d56e5dce3ff1f0dd2c47c29367629262f527
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: NVCCPlugin
  Building wheel for NVCCPlugin (setup.py) ... [?25l[?25hdone
  Created wheel for NVCCPlugin: filename=NVCCPlugin-0.0.2-py3-none-any.whl size=4295 sha256=1a69335f1d8808a31c8c22fd2e5c2a370969b465c2652d8d93bf35927756170d
  Stored in directory: /tmp/pip-ephem-wheel-cache-z2wbfyqd/wheels/

In [68]:
%%cuda --name curand.cu
#include <stdio.h>
#include <iostream>
#include <cstdlib>
#include <curand.h>
#include <cublas_v2.h>
#include <random>
#define BLOCK_DIM_X 1000
using namespace std;

//Функция случайной генерации чисел на GPU
void GPU_fill_rand(float *A, int N) {
    // Create a pseudo-random number generator
    curandGenerator_t prng;
    curandCreateGenerator(&prng, CURAND_RNG_PSEUDO_DEFAULT);
    // Set the seed for the random number generator using the system clock
    curandSetPseudoRandomGeneratorSeed(prng, (unsigned long long) clock());
    // Fill the array with random numbers on the device
    curandGenerateUniform(prng, A, N);
}


float pi_monte_carlo_cpu(float* x, float* y, int n) {
    float sum = 0;//кол-во точек лежащих в окружности
    float v;
    for(int i = 0; i<n; i++){
        v = x[i]*x[i] + y[i]*y[i];
        if(v < 1) sum++;
    }
    return sum*4/n;
}

__global__ void pi_monte_carlo_gpu(float* x, float* y, int n, float* res)
{
    __shared__ float v[BLOCK_DIM_X];
    int idx = threadIdx.x + blockIdx.x * blockDim.x;
    if (idx < n) {
        v[threadIdx.x] = x[idx]*x[idx] + y[idx]*y[idx];
    }
    __syncthreads();
    if (threadIdx.x == 0) {
        float summ = 0;
        for (int i = 0; i < blockDim.x; ++i)
            if(v[i] < 1) summ++;

        atomicAdd(res, 4*summ/n);
    }
}

int main() {
    int N = 1000; //количество точек
    cout << "N = " << N;
    float pi_cpu;
    //Выделение памяти на CPU
    float *X = (float *)malloc(N * sizeof(float)); //координата точки X
    float *Y = (float *)malloc(N * sizeof(float)); //координата точки Y
    //Выделение памяти на GPU
    float *d_X, *d_Y;
    cudaMalloc(&d_X, N * sizeof(float));
    cudaMalloc(&d_Y, N* sizeof(float));
    //Заполнение массивов случайными числами на GPU
    GPU_fill_rand(d_X, N);
    GPU_fill_rand(d_Y, N);
    //Копирование данных на CPU
    cudaMemcpy(X, d_X, N * sizeof(float),cudaMemcpyDeviceToHost);
    cudaMemcpy(Y, d_Y, N * sizeof(float),cudaMemcpyDeviceToHost);

    srand(time(0));
    clock_t start, end;
    start = clock();
    //Вычисление значения числа пи на CPU
    pi_cpu = pi_monte_carlo_cpu(X, Y, N);
    end = clock();
    double time_cpu = static_cast <double>(end - start) / static_cast <double>(CLOCKS_PER_SEC);
    cout << "\nPI CPU = " << pi_cpu << "\tCPU time = " << time_cpu;
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    float* pi_gpu = new float;
    *pi_gpu = 0;
    float* d_pi;
    cudaMalloc(&d_pi, sizeof(float));
    cudaMemcpy(d_pi, pi_gpu, sizeof(float), cudaMemcpyHostToDevice);

    dim3 block_dim(BLOCK_DIM_X);
    dim3 grid_dim(ceil(static_cast <float> (N) / static_cast <float> (block_dim.x)));

    cudaEvent_t begin, stop;
    cudaEventCreate(&begin);
    cudaEventCreate(&stop);

    cudaEventRecord(begin, 0);
    pi_monte_carlo_gpu << <grid_dim, block_dim >> > (d_X, d_Y, N, d_pi);
    cudaEventRecord(stop, 0);
    cudaEventSynchronize(stop);
    float gpu_time;
    cudaEventElapsedTime(&gpu_time, begin, stop);

    cudaMemcpy(pi_gpu, d_pi, sizeof(float), cudaMemcpyDeviceToHost);
    cout << "\nPI GPU = " << *pi_gpu << "\tGPU time = " << gpu_time / 1000.;

    //Освобождение памяти на GPU
    cudaFree(d_X);
    cudaFree(d_Y);
    cudaFree(d_pi);
    //Освобождение памяти на CPU
    free(X);
    free(Y);
    return 0;
}



'File written in /content/src/curand.cu'

In [69]:
!nvcc -o /content/src/curand /content/src/curand.cu -lcurand -lcublas

In [70]:
!/content/src/curand

N = 1000
PI CPU = 3.144	CPU time = 1e-05
PI GPU = 3.144	GPU time = 2.4e-05