In [19]:
! pip install pycuda

import numpy as np
import time

import pycuda.gpuarray as gpuarray
import pycuda.autoinit
from  pycuda import  driver
from pycuda.compiler import SourceModule
from pycuda.curandom import rand as curand



In [20]:
mod = SourceModule("""
  __global__ void pi_calc_gpu(double *x, double *y, double *result_gpu, const int n) {
        
        int gpu_count = 0;
        int idx = threadIdx.x + (blockIdx.x*blockDim.x);
        int thread_count = gridDim.x*blockDim.x;

        for (int i=idx; i<n; i += thread_count) {
          int V;
          V = pow(x[i], 2) + pow(y[i], 2);
          if (V < 1)
            gpu_count += 1;
          
        }

        atomicAdd(result_gpu, gpu_count);
  }    
""")

In [21]:
cpu_count = 0
def pi_calc_cpu(X, Y, N):
  V = X ** 2 + Y ** 2
  gen_cpu = [cpu_count + 1 for i in range(N) if V[i] < 1]
  res = 4/N * sum(gen_cpu)
  return res

N = 65536*16
X, Y = np.random.random(N), np.random.random(N)
block = (256, 1, 1)
grid = (int(N/(128 * block[0])), 1)

cpu_start = time.time()
res_cpu = pi_calc_cpu(X, Y, N)
cpu_time = time.time() - cpu_start
print('Число PI на CPU: ', result_cpu)
print('Время на CPU: ', round(cpu_time, 4))

# т.к. массивы точек являются одномерными
block = (128, 1, 1)
grid = (int(N/(128 * block[0])), 1)

res_gpu = gpuarray.zeros((1,), dtype=np.double)
res_gpu  = res_gpu.get()

calc_gpu = mod.get_function("pi_calc_gpu")

gpu_start = time.time()
calc_gpu(driver.In(X), driver.In(Y), driver.Out(res_gpu), np.int32(N), block = block, grid = grid)
driver.Context.synchronize()
gpu_time = time.time() - gpu_start

res_gpu =  res_gpu[0] * 4/N
print('Число PI на GPU: ', res_gpu)
print('Время на GPU: ', round(gpu_time, 4))
print('Ускорение: ', cpu_time/gpu_time)
print('Сравнение с PI: ')
print('На GPU:', abs(np.pi -  result_gpu) )
print('На CPU:', abs(np.pi -  result_cpu) )

Число PI на CPU:  3.1413002014160156
Время на CPU:  0.3932
Число PI на GPU:  3.145000457763672
Время на GPU:  0.0108
Ускорение:  36.53720564453601
Сравнение с PI: 
На GPU: 0.000292452173777491
На CPU: 0.000292452173777491
