# License

    Jupyter notebook for computing Pi using Monte Carlo sampling
    Copyright (C) 2018 Andre.Brodtkorb@ifi.uio.no

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.

In [1]:
#Lets have matplotlib "inline"
%matplotlib inline

#Import packages we need
import numpy as np
import pycuda.compiler as cuda_compiler
from pycuda.gpuarray import GPUArray
import pycuda.driver as cuda_driver

from matplotlib import pyplot as plt

import IPythonMagic

In [2]:
%cuda_context_handler context

In [3]:
cuda_kernel = """
//Based on Stroustrup, adapted for CUDA
__device__ float generateRandomNumber(long& last_draw) {
    last_draw = last_draw*1103515245 + 12345;
    long abs = last_draw & 0x7fffffff;
    return abs / 2147483648.0; 
}


/**
  * @param output Where to place results
  * @param seed Seed used to seed the RNG (Linear congruential generator)
  * Uses only for 1 thread per block
  */
__global__ void generateRandomNumbers(unsigned int* output, unsigned int seed) {
    unsigned int tid = blockIdx.x;
    long spacing = 18446744073709551615ul / static_cast<unsigned long>(gridDim.x);
    long last_draw = seed + tid*spacing; //Initialize the LCG to seed, and keep track of last drawn long
    
    //Generate coordinate
    output[tid] = int(generateRandomNumber(last_draw) + 0.5f);
}


/**
  * @param output Where to place results
  * @param seed Seed used to seed the RNG (Linear congruential generator)
  * Uses only for 1 thread per block
  */
__global__ void computePi1(unsigned int* output, unsigned int seed) {
    //You need to implement this kernel!
}
"""
module = cuda_compiler.SourceModule(cuda_kernel)
randomNumbers = module.get_function("generateRandomNumbers");

In [4]:
def computePiCPU(n_points):
    n_inside = 0
    for i in range(n_points):
        x = np.random.rand()
        y = np.random.rand()
        r = np.sqrt(x*x + y*y)
        if (r < 1.0):
            n_inside += 1
            
    return 4*n_inside/n_points

print("Pi computed on the CPU is " + str(computePiCPU(100000)))

Pi computed on the CPU is 3.14484


In [5]:
def computePi1GPU(n_points):
    #You have to implement this!
    print("ComputePiGPU is not properly implemented: fix this!")
    
    #Set block and grid size
    block = (1, 1, 1)
    grid = (n_points, 1, 1)
    
    #Allocate the output data on the GPU and on the CPU
    output = np.empty((n_points, 1), dtype=np.uint32)
    output_gpu = GPUArray(output.shape, np.uint32)

    #Execute program on device
    print("I didn't compute Pi, but I generated some random numbers for you.")
    randomNumbers(output_gpu.gpudata, np.uint32(0), block=block, grid=grid)

    #Copy data from device to host
    output_gpu.get(output)
    
    print("The sum is " + str(np.sum(output)) + " and should be close to " + str(0.5*n_points))

    return 0.0

print("Pi computed on the GPU is " + str(computePi1GPU(1000)))

ComputePiGPU is not properly implemented: fix this!
I didn't compute Pi, but I generated some random numbers for you.
The sum is 499 and should be close to 500.0
Pi computed on the GPU is 0.0
