In [2]:
# libraries for numerical arrays
from numpy import array, arange, linspace

# libraries for statistical functions
from scipy.stats import binom, norm

# libraries for interactions with the graphic
from ipywidgets import interact, fixed, FloatSlider, IntSlider, Button, Checkbox

# libraries libraries and options for graphic output
from bokeh.io import push_notebook, show, output_notebook, output_file
from bokeh.plotting import figure
output_notebook()
options = dict(plot_height=300, plot_width=700, 
               tools="pan,wheel_zoom,reset,save,crosshair")

from IPython.core.display import HTML
with open( './style/custom.css', 'r' ) as f: style = f.read()
HTML( style )

<style>
.text_cell_render {font-family: Times New Roman, serif;}
</style>

## Binomial distribution

We say that $X$ is a <mark>binomial random variable</mark> with parameters $n$ and $p$, for short $X\sim B(n,p)$, if

$$X\ \ =\ \ \sum^n_{i=1}X_i$$

where the $X_i$ are independent Bernoulli random variables with success probability $p$. So, $X$ counts the number of successes in a sequence of $n$ independent Bernoulli trials with success probability $p$. Clearly $X\in\{0,\dots,n\}$.

Equivalently, we may also say that $X$ has a <mark>binomial distribution</mark> with parameters $n$ and $p$. In fact random variables are characterized by their distribution a.k.a. <mark>probability mass function</mark> (abbreviated <mark>pmf</mark>). The pmf of of a binomial random variable is not difficult to compute. For $x\in\{0,\dots,n\}$ we have

$${\rm Pr}\,(X=x)\ \ =\ \ {n\choose x}p^x(1-p)^{n-x}$$

Consequently, the <mark>cumulative distribution function</mark> (abbreviated <mark>cdf</mark>) is

$${\rm Pr}\,(X\le x)\ \ =\ \ \sum^x_{i=0}{n\choose i}p^i(1-p)^{n-i}$$

Below we plot the function $P(X=x)$ for various values of $n$ and $p$. 

<div class=warn>
**Warning:** remember to do bookeping  
<div/>


In [3]:
from bokeh.layouts import column
from bokeh.models import CustomJS, ColumnDataSource, Slider

n_max= 100           # maximal number of trials
x = arange(n_max+1)  # inizialization

# create a figure
plot1 = figure(title="pmf of X ~ B(n,p)", x_axis_label = "#successes", y_axis_label = "probability", 
              x_range=(0,n_max), y_range=(0,0.2), **options )

# initialization of a barplot with arbitrary values
r1 = plot1.vbar(x, top=x, bottom=0, width=0.9, color="#111188", alpha=0.5)

def update1(n, p):
    r1.data_source.data['top'] = binom.pmf(x,n,p)
    push_notebook()

    show(plot1, notebook_handle=True)

interact(update1, 
         n=IntSlider(description="n", min=10, max=n_max, step=10, value=50), 
         p=FloatSlider(description="p", min=0.1, max=0.95, step=0.05, value=0.5));

interactive(children=(IntSlider(value=50, description='n', min=10, step=10), FloatSlider(value=0.5, descriptio…

## Standization of a probability distribution


Let $X$ be an r.v. with expected value $\mu$ and and standard deviation $\sigma$. The r.v.

$$Z\ =\ \frac{X-\mu}{\sigma}$$

is said to be obtained from $X$ by <mark>standardization</mark>. For a standard r.v. we always have ${\rm E}(Z)=0$ and ${\rm Var}(Z)=1$. 

Below we plot the probability mass function of $Z$, for $X\sim B(n,p)$.

When $X\sim B(n,p)$ it is known that ${\rm E}(X)=np$ and ${\rm Var}(X)=np(1-p)$.

In [4]:
# create a figure
plot = figure(title="pmf of Z = (X - μ) / σ for X ~ B(n,p)", x_axis_label = "( #successes - μ) / σ", y_axis_label = "probability",
              x_range=(-3,3), y_range=(0,0.4), **options )

# initialization of a barplot with arbitrary values
r = plot.vbar(x, top=x, bottom=0, width=0.15, color="#991111", alpha=0.5)

def update(n=20, p=0.5):
    z = [(i-n*p)/(n*p*(1-p))**0.5 for i in range(n_max+1)]
    r.data_source.data['x'] = z
    r.data_source.data['top'] = binom.pmf(x,n,p)
    push_notebook()

show(plot, notebook_handle=True)

interact(update, 
         n=IntSlider(description="$n$", min=10, max=n_max, step=10, value=20), 
         p=FloatSlider(description="$p$", min=0.1, max=0.95, step=0.05, value=0.5));

interactive(children=(IntSlider(value=20, description='$n$', min=10, step=10), FloatSlider(value=0.5, descript…

## Another way to represent probabiltities

In the graph above we have drown rectangles centered on the possible outcomes a standardized r.v. obtained from $X\sim B(n,p)$. That is 

$\quad\displaystyle\frac{x-\mu_{n,p}}{\sigma_{n,p}}\quad$ for $\quad x\in\big\{0,\dots,n\big\}$,  $\quad\mu_{n,p}=np,\quad$ and $\quad\sigma_{n,p}=\sqrt{np(1-p)}$

The base of the rectangles in the figure above is arbitrary (it has no relatio with the data).

Below we draw rectangles of base $\frac{1}{\sigma_{n,p}}$ the area of the rectangle represent the probability.

We may compare the graph to <mark>probability density function</mark> (abreviated <mark>pdf</mark>) of a standard normal r.v. $Z\sim N(0,1)$

In [5]:
# Binomiale standardizzata area rettangoli
n_max= 100
x = k = arange(n_max+1)
xx = linspace(-3, 3, 1000)

plot3 = figure(title="pmf of Z = (X - μ) / σ for X ~ B(n,p) -- probability proportional to the area", 
            x_axis_label = "( #successes - μ) / σ", y_axis_label = "probability / σ",
            x_range=(-3,3), y_range=(0,0.6),  **options )
r = plot3.vbar(x, top=[0]*(n_max+1), width=0.05, bottom=0, color="#119911", alpha=0.5)
s = plot3.line(xx, norm.pdf(xx), line_width=2, color="#ff0000", line_alpha=0)

def update(b=False, n=20, p=0.5): 
    if  b : s.glyph.line_alpha = 1
    else   : s.glyph.line_alpha = 0
    dx = 1 / (n*p*(1-p))**0.5 
    x = [(i-n*p)*dx for i in range(n_max+1)]
    r.data_source.data['x'] = x
    r.data_source.data['top'] = binom.pmf(k,n,p) / dx
    r.glyph.width = dx - 0.02
    push_notebook()

show(plot3, notebook_handle=True)

interact(update,  
         n=IntSlider(description="n", min=10, max=n_max, step=10, value=20), 
         p=FloatSlider(description="p", min=0.1, max=0.95, step=0.05, value=0.5));

interactive(children=(Checkbox(value=False, description='b'), IntSlider(value=20, description='n', min=10, ste…

In [6]:
#n_max= 200
#k = x = arange(n_max+1)  
#
#pl = figure(title="PMF of X ~ B(n,p)", x_axis_label = "relative frequence of succes", y_axis_label = "probability", 
#            x_range=(0,1), y_range=(0,0.3), **options )
#r = pl.vbar(x, top=[0]*(n_max+1), width=0.005, bottom=0, color="#111188", alpha=0.5)
#
#def update(n=20, p=0.5):
#    x = [i/n for i in range(n_max+1)]
#    r.data_source.data['x'] = x
#    r.data_source.data['top'] = binom.pmf(k,n,p)
#    push_notebook()
#
#show(pl, notebook_handle=True)
#
#interact(update, n=(20,n_max,2), p=(0.1, 0.9, 0.1))