## Principle of Monte-Carlo integration:

* Define a function that tests is a point is inside a region
* Generate $N_{total}$ random points over a know area $A_{total}$ that is larger than the region
* count the number of points $N_{in}$ that pass the test
* Compute the area of the region as:

\begin{equation}
A_{in} \approx \frac{N_{in}}{N_{total}} * A_{total}
\end{equation}


We will define a python module to compute pi by summing the results of N_jobs monte-carlo realizations that we will run in parallel.  Since multiprocessing only works for *modules* we can't really run this in a notebook, so we will write out a file first, and then run it

In [None]:
print("hello")

In [None]:
%%writefile multiproc.py

from multiprocessing import Pool
from concurrent.futures import ProcessPoolExecutor
from itertools import repeat
import time
import random
import math

A_TOTAL = 4.0

def monte_carlo_integrate(is_inside, n_total, range_x, range_y):
    """ 
    a slow (non numpy) way of computing an integral with
    Monte-Carlo sampling  
    """

    n_inside = 0
    for i in range(n_total):
        x = random.uniform(*range_x)
        y = random.uniform(*range_y)
        if is_inside(x, y):
            n_inside += 1

    return n_inside


def monte_carlo_pi(n_total):
    return monte_carlo_integrate(
        is_inside=lambda x, y: math.sqrt(x ** 2 + y ** 2) < 1,
        n_total=n_total,
        range_x=[-1, 1],  # chosen to match A_total
        range_y=[-1, 1],
    )


def parallel_monte_carlo_pi(n_jobs, n_total_per_job=100_000, verbose=False):

    pool = Pool()   # note you cannot define this globally!
    results = pool.map(monte_carlo_pi, repeat(n_total_per_job, n_jobs))
    pi = sum(results)/(n_total_per_job*n_jobs)*A_TOTAL
    if verbose:
        print(f"Pool: {pool}")
        print(f"Results: {results}")
        print(f"Pi is {pi:.10f}")
    return pi

def parallel_monte_carlo_pi_2(n_jobs, n_total_per_job=100_000, verbose=False):
    with ProcessPoolExecutor() as pool:   # note you cannot define this globally!
        results = pool.map(monte_carlo_pi, repeat(n_total_per_job, n_jobs))
        pi = sum(results)/(n_total_per_job*n_jobs)*A_TOTAL
        if verbose:
            print(f"Pool: {pool}")
            print(f"Results: {results}")
            print(f"Pi is {pi:.10f}")
        return pi


if __name__ == "__main__":
    parallel_monte_carlo_pi(10)

Now we can import the module and run it locally:

In [None]:
from multiproc import parallel_monte_carlo_pi, monte_carlo_pi
import matplotlib.pyplot as plt
plt.style.use("ggplot")

In [None]:
parallel_monte_carlo_pi(n_jobs=10, n_total_per_job=100_000, verbose=True)

In [None]:
t = {}

In [None]:
t[1] = %timeit -o monte_carlo_pi(5_000_000)/5_000_000 * 4

In [None]:
t[2] = %timeit -o parallel_monte_carlo_pi(n_jobs=2, n_total_per_job=5_000_000//2)

In [None]:
t[5] = %timeit -o parallel_monte_carlo_pi(n_jobs=5, n_total_per_job=5_000_000//5)

In [None]:
t[10] = %timeit -o parallel_monte_carlo_pi(n_jobs=10, n_total_per_job=5_000_000//10)

In [None]:
t[20] = %timeit -o parallel_monte_carlo_pi(n_jobs=20, n_total_per_job=5_000_000//20)

In [None]:
t[50] = %timeit -o parallel_monte_carlo_pi(n_jobs=50, n_total_per_job=5_000_000//50)

In [None]:
t[100] = %timeit -o parallel_monte_carlo_pi(n_jobs=100, n_total_per_job=5_000_000//100)

In [None]:
t[500] = %timeit -o parallel_monte_carlo_pi(n_jobs=500, n_total_per_job=5_000_000//500)

In [None]:
t[1000] = %timeit -o parallel_monte_carlo_pi(n_jobs=1000, n_total_per_job=5_000_000//1000)

In [None]:
fig, ax = plt.subplots(1,1)
plt.errorbar(
    x=t.keys(),
    y=[x.average for x in t.values()],
    yerr=[x.stdev for x in t.values()],
    lw=3,
    linestyle="dotted"
)
plt.xlabel("Number of Jobs")
plt.ylabel("Time to compute PI (5M samples)")
ax.set_yscale("log")
ax.set_xscale("log")