# Requirements

In [2]:
from numba import njit
import numpy as np
import random

# Random $\pi$

Compute $\pi$ by generating random points in a square and counting how many there are in the circle inscribed in the square.

In [5]:
def compute_pi(nr_tries):
    hits = 0
    for _ in range(nr_tries):
        x = random.random()
        y = random.random()
        if x**2 + y**2 < 1.0:
            hits += 1
    return 4.0*hits/nr_tries

In [6]:
@njit
def compute_pi_jit(nr_tries):
    hits = 0
    for _ in range(nr_tries):
        x = random.random()
        y = random.random()
        if x**2 + y**2 < 1.0:
            hits += 1
    return 4.0*hits/nr_tries

In [32]:
@njit(['float64(int64)'])
def compute_pi_jit_sign(nr_tries):
    hits = 0
    for _ in range(nr_tries):
        x = random.random()
        y = random.random()
        if x**2 + y**2 < 1.0:
            hits += 1
    return 4.0*hits/nr_tries

In [9]:
%timeit compute_pi(100_000)

27.1 ms ± 277 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [10]:
%timeit compute_pi_jit(100_000)

687 µs ± 9.53 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [34]:
%timeit compute_pi_jit_sign(np.int64(100_000))

685 µs ± 8.96 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


Using numba's just-in-time compiler significantly speeds up the computations.

# Quadrature $\pi$

Another method to compute $\pi$ is to compute the definite integral
$$
\frac{\pi}{2} = \int_{-1}^{1} \sqrt{1 - x^2} dx
$$

In [38]:
@njit
def quad_pi_jit(nr_steps):
    delta = 2.0/nr_steps
    x = np.linspace(-1.0, 1.0, nr_steps)
    f = np.empty_like(x)
    for i in range(x.size):
        f[i] = np.sqrt(1.0 - x[i]**2)
    return 2.0*f.sum()*delta

We can implement this so that the loop can be parallelized (numba cannot deal with reductions).

In [35]:
@njit(parallel=True)
def quad_pi_par(nr_steps):
    delta = 2.0/nr_steps
    x = np.linspace(-1.0, 1.0, nr_steps)
    f = np.empty_like(x)
    for i in range(x.size):
        f[i] = np.sqrt(1.0 - x[i]**2)
    return 2.0*f.sum()*delta

The pure numpy implementation for comparison.

In [44]:
def quad_pi_np(nr_steps):
    delta = 2.0/nr_steps
    x = np.linspace(-1.0, 1.0, nr_steps)
    return 2.0*np.sqrt(1.0 - x**2).sum()*delta

In [50]:
%timeit quad_pi_jit(100_000_000)

328 ms ± 34.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [51]:
%timeit quad_pi_par(100_000_000)

202 ms ± 1.19 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [52]:
%timeit quad_pi_np(100_000_000)

676 ms ± 43.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


The parallized version is faster, but the parallel efficiency is far from great.