In [1]:
# RUN THIS CELL: it loads some style files
from IPython.display import HTML
with open( './style/custom.css', 'r' ) as f: style = f.read()
HTML( style )

In [2]:
from numpy import linspace, arange
from scipy.stats import binom, norm                       # lib. for statistical functions
from ipywidgets import interact, FloatSlider, IntSlider, Checkbox   # lib. for interactive graphic
from bokeh.io import push_notebook, show, output_notebook # lib. for graphic output
from bokeh.plotting import figure
output_notebook()
options = dict(plot_height=250,plot_width=700,
               tools="pan,wheel_zoom,reset,save,crosshair,box_select")

# The normal limit of the binomial distribution

Let $Y_i$ be independent Bernoulli variable with expected value $p$ and variance $p(1-p)$. 
Define

$\displaystyle\qquad \sum^n_{i=1}Y_i = X_{n,p}\sim B(n,p)$

$\displaystyle\qquad\mu_{n,p}=np,\quad$ expected value of $X_{n,p}$

$\displaystyle\qquad\sigma_{n,p}=\sqrt{\strut n\,p\,(1-p)}\quad$ standard deviation of $X_{n,p}$

$\displaystyle\qquad\bar Y_{n,p}\ =\ \dfrac1n\sum^n_{i=1}Y_i\qquad$ 

Therefore $\bar Y_{n,p}$ has expected value $p$ and standard deviation $p(1-p)$. By the central limit theorem is approximadively $N(p, p(1-p)/n)$. 


For graphical reasons we standardize the variables $\bar Y_{n,p}$.

$\displaystyle\qquad\dfrac{\bar Y_{n,p}-p}{\strut\sqrt{p(1-p)/n}}\ =\ \dfrac{X_{n,p}-\mu_{n,p}}{\sigma_{n,p}}\quad$

Hence, the central limit theorem says that

$\displaystyle\qquad\dfrac{X_{n,p}-\mu_{n,p}}{\sigma_{n,p}}\quad$(standardization of a binomial r.v.)$\quad\sim N(0,1)$

In the graph below we draw rectangles centered on $z$,  where

$\quad\displaystyle z\ =\ \frac{x-\mu_{n,p}}{\sigma_{n,p}}\quad$ for $\quad x\in\big\{0,\dots,n\big\}$ is the number of successes, 

Every rectangle has base $\dfrac{1}{\sigma_{n,p}}$. 

Its area is the probability of observing $z\cdot\sigma_{n,p}+\mu_{n,p}$ successes.

We may compare the graph to the p.d.f. of a standard normal r.v. $Z\sim N(0,1)$

In [3]:
# Binomiale standardizzata area rettangoli
n_max= 1000
x = k = arange(n_max+1)
xx = linspace(-3, 3, 10000)

plot1 = figure(title="PMF of Z = (X - μ) / σ for X ~ B(n,p)", 
            x_axis_label = "( #successes - μ) / σ", y_axis_label = "probability ⋅ σ",
            x_range=(-3,3), y_range=(0,0.45),  **options )              
plot1.title.text_font="courier"
plot1.title.text_font_size="14pt"
r = plot1.vbar(x, top=[0]*(n_max+1), width=0.05, bottom=0, color="#119911", alpha=0.5)
s = plot1.line(xx, norm.pdf(xx), line_width=2, color="#ff0000", line_alpha=0)

def update(limit=False, n=20, p=0.5): 
    if  limit : s.glyph.line_alpha = 1
    else  : s.glyph.line_alpha = 0
    dx = 1 / (n*p*(1-p))**0.5 
    x = [(i-n*p)*dx for i in range(n_max+1)]
    r.data_source.data['x'] = x
    r.data_source.data['top'] = binom.pmf(k,n,p) / dx
    r.glyph.width = dx - 0.02
    push_notebook()

show(plot1, notebook_handle=True)

interact(update,  
         n=IntSlider(description="n", min=10, max=n_max, step=10, value=20), 
         p=FloatSlider(description="p", min=0.1, max=0.95, step=0.05, value=0.5));

interactive(children=(Checkbox(value=False, description='limit'), IntSlider(value=20, description='n', max=100…