In [1]:
import random
import copy
import timeit
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline

 - https://stackoverflow.com/questions/55255633/on-monte-carlo-probability-syntax
 

In [2]:
random.seed(123456)

In [3]:
n = 3
m = 4
p = 5

In [4]:
idx1 = [1]*n + [0]*(m*p-n)
def draw1(c, m, **kwargs):
    """
    Draw a random Monte Carlo Input: naïve version
    
      - Generate a random permutation of vector (1, ..., n, 0, ..., m*p - n)
      - Count number of women per table: reshape to matrix (p,m), then sum along table axis
      - Return the set of table indice where count > 0
    """
    x = np.random.permutation(c)
    x = np.array(x).reshape(m, -1).T
    return set(np.where(np.sum(x, axis=0)>0)[0])

In [5]:
def draw2(n, m, **kwargs):
    """
    Draw a random Monte Carlo Input: wrong version
    
      - Sample n times uniformally (with replacement) from set {0, ..., m-1}
      - Return sampled set
    """
    return set(random.randint(0, m-1) for _ in range(n))

In [6]:
setup = {
    'draw':{},
    'draw1': {
        'places': idx1,
        'function': draw1
    },
    'draw2': {
        'function': draw2
    }
}

In [7]:
def runMonteCarlo(N=100, n=3, m=4, p=5, function=None, places=None):
    """
    Run Monte Carlo Simulation:
    
    Problem statement:
      - m*p persons are mapped to m tables of capacity p;
      - There are exactly n women among the m*p humans;
      
    Assess, among others, the distribution of X (number of tables where no woman sit).
    
    Function parameters are:
    
      - N (int), number of Monte Carlo simulations;
      - n (int), number of women;
      - p (int), number of persons per table;
      - m (int), number of tables.
    
    Additional parameters:
    
      - func (function): to draw a random experiment for this Monte Carlo simulation
                         with function signature accepting (c, n, p, m) parameters.
                         Function must return a subset of table indices {0, m-1} where woman sit.
      - places (sequence): to represent sampled space (aliased as c).

    Function returns:
    
      - r (dict of (int, float)), experimental distribution of X;
      - E (float), the expectation of random variable X
      - f (float), probability of no woman sitting at table A.
    """
    
    def _draw(c, n, p, **kwargs):
        """
        Draw a random Monte Carlo Input for the given problem:
        
          - Draw n elements without replacement from the set {0, m*p-1} (select woman place indices)
          - Perform floor division of the n elements by p (assess table indices from place indices)
          - Returns a set of place indices where women sit.
        """
        return {x//p for x in random.sample(c, n)}
        
    # Initialization
    draw = function or _draw                # Select Monte Carlo Input function
    if places is None:
        places = list(range(m*p))           # Place indices: set {0, m*p-1}
    C = {k:0 for k in range(m+1)}           # Table without women Counter      
    f = 0                                   # Frequency of: There is no woman sitting at table A  
    
    # Generate Monte Carlo Inputs and assess Outputs
    for k in range(N):
        e = draw(c=places, n=n, m=m, p=p)   # Draw a random experiment
        x = m - len(e)                      # Number of table without woman
        C[x] += 1                           # Update Counter for X distribution
        f += int(0 not in e)                # Is there no woman at table A (index 0)?
    
    # Aggregation & Rationalization of Ouptuts
    r = {k:v/N for (k,v) in C.items()}
    E = sum([k*v for (k,v) in r.items()])
    f /= N
    
    # Trick to bind results to timeit caller:
    globals().get('_result', []).append({'F': r, 'extra': {'E[X]': E, 'fA': f}})
    
    return r, E, f

In [8]:
runMonteCarlo()

({0: 0.0, 1: 0.43, 2: 0.54, 3: 0.03, 4: 0.0}, 1.6, 0.34)

In [9]:
df = pd.read_pickle('mcws.pickle')
df.tail(15)

ModuleNotFoundError: No module named 'pandas.core.internals.managers'; 'pandas.core.internals' is not a package

In [None]:
df2 = df.groupby(['N', 'key']).mean().drop(['id', 'batch'], axis=1).unstack().sort_index(axis=1)
df2.tail()

In [None]:
axe = df2['elapsed'].plot()
axe.set_title("Monte Carlo Simulation: Time Complexity")
axe.set_xlabel(r"Sample Size, $N$")
axe.set_ylabel(r"Elapsed Time, $t$ $[\mathcal{s}]$")
axe.legend(bbox_to_anchor=(1,1), loc='upper left')
axe.grid()

In [None]:
ref = [0, 500/1140, 600/1140, 40/1140, 0]
err = [0, 24/64, 36/64, 4/64, 0]

In [None]:
axe = df2['E[X]'].plot()
#axe.axhline(91/57, label=r'$E[X] = %.4f$' % (91/57), linestyle=':', color='k')
#axe.set_ylim([1.5, 1.8])
axe.set_title("Monte Carlo Simulation: Expected Value")
axe.set_xlabel(r"Sample Size, $N$")
axe.set_ylabel(r"Expectation, $\mathrm{E}[X]$")
axe.set_yticks([1.58, 1.64, 1.70] + [91/57, 27/16])
axe.legend(bbox_to_anchor=(1,1), loc='upper left')
axe.grid()

In [None]:
axe = df2['fA'].plot()
#axe.axhline(91/228, label=r'$P(A=0) = %.4f$' % (91/228), linestyle=':', color='k')
axe.set_title("Monte Carlo Simulation: Probability")
axe.set_xlabel(r"Sample Size, $N$")
axe.set_ylabel(r"Frequency, $P(A=0)$")
axe.set_yticks([0.38, 0.44] + [91/228, 27/64])
axe.legend(bbox_to_anchor=(1,1), loc='upper left')
axe.grid()

In [None]:
data = df.groupby(['N', 'key']).mean()
label = data.index.levels[0][-1]
data = data.loc[label,:].loc[:,[0,1,2,3,4]].T

In [None]:
axe = data.plot(kind='bar')
axe.set_title(r"Monte Carlo Simulation: Distribution ($N=%d$)" % label)
axe.set_xlabel(r"Random Variable, $x$")
axe.set_ylabel(r"Frequency, $P(k=x)$")
axe.legend(bbox_to_anchor=(1,1), loc='upper left')
#axe.set_yticks(list(axe.get_yticks()) + ref[1:4])
axe.set_yticks([0,0.2,0.4,0.6] + ref[1:4])
axe.grid()

In [None]:
data['Reference'] = ref
data['Attempt'] = err
data