# Birthday paradox

https://en.wikipedia.org/wiki/Birthday_problem

In [56]:
import csv
import numpy as np

from ipywidgets import interactive, fixed
from IPython.display import display

from bokeh.plotting import figure
from bokeh.io import output_notebook, show
from bokeh.models import (HoverTool, PanTool, SaveTool,
                          WheelZoomTool,
                          BoxZoomTool, CrosshairTool, HelpTool)

output_notebook()

DAYS_OF_YEAR = 366

def _birthday_distribution():
    """
    Distribution of birthdays in a Calendar year:
    http://www.panix.com/~murphy/bday.html
    """
    result = np.arange(DAYS_OF_YEAR)
    with open('bdata.txt', 'r') as csvfile:
        reader = csv.reader(csvfile, delimiter=' ')
        # Skip the header
        next(reader)
        for i, row in enumerate(reader):
            if row[0] == 'total':
                total_people = int(row[1])
                continue
            result[i] = int(row[1])
                    
    return result / total_people

BIRTHDAY_DISTRIBUTION = _birthday_distribution()

def _select_distribution(distribution_name):
    if distribution_name == 'uniform':
        return None
    elif distribution_name == 'real':
        return BIRTHDAY_DISTRIBUTION
    else:
        raise ValueError('Unknown distribution')

def _compute(people, trials, days_of_year, distribution=None):
    buffer = np.zeros(people)
    for t in range(trials):
        for p in range(people):
            sample = np.random.choice(np.arange(days_of_year), size=p, p=distribution)
            if len(sample) != len(np.unique(sample)):
                buffer[p] += 1.0
    return buffer / trials

def _display(people, result):
    hover = HoverTool(tooltips=[
        ("number of people", "$index"),
        ("probability", "$y")
    ])
    plot = figure(x_axis_label='Number of people', y_axis_label='Probability of a pair',
                  tools=[hover, PanTool(), SaveTool(),
                          WheelZoomTool(),
                          BoxZoomTool(), CrosshairTool(line_color='lightgrey', line_width=.5), HelpTool(),])
    plot.line(np.arange(people), result)
    plot.circle(np.arange(people), result, fill_color='red', size=4)
    show(plot)

def birthday_paradox(people, trials, days_of_year, distribution_name='uniform'):
    distribution = _select_distribution(distribution_name)
    result = _compute(people, trials, days_of_year, distribution)
    _display(people , result)
    
    return (np.arange(people), result)

In [57]:
p = interactive(birthday_paradox, people=(1,100), trials=(1,1000),
                days_of_year=fixed(DAYS_OF_YEAR), distribution_name=['uniform', 'real'])
display(p)

Widget Javascript not detected.  It may not be installed or enabled properly.
