# The Poisson Process: Section 1 - Numerical Example

In [1]:
import numpy as np
import pandas as pd

import ipywidgets as widgets
from IPython.display import display, Math

In [2]:
import bokeh.plotting as bplt
from bokeh.models import Range1d, LabelSet, ColumnDataSource, Arrow, TeeHead, VeeHead, NormalHead, OpenHead, Whisker
from bokeh.models.glyphs import VBar, Line, Step
from bokeh.models.markers import Circle
from bokeh.io import output_notebook, push_notebook
output_notebook()

## Passengers Arriving to a Bus Stop

We want to model the arrival of passengers to a bus stop (say, the Stanford Circle shuttle stop).  Our information is that passengers arrive *on average* at a rate of 2 per minute, but the actual arrival process is not deterministic: it would be unrealistic to assume that the next passenger arrives exactly 30 seconds after the previous one.

How can we add stochastic variability to the arrival process?  (And do so in a way that allows tractable analysis?)
* Time interval between successive arrivals is random (must be non-negative).
* Independent and identically distributed.
* On average, 30 seconds.

Let $X_n$ be the time interval between the $n-1$st and $n$th passenger, so $(X_1, X_2, \dots)$ is a sequence of independent and identically distributed random variables with $\mathsf{E}[X_i] = 0.5$.  Note that this still allows some flexibility.  In particular, we can choose any non-negative distribution with the appropriate mean: Exponential, Uniform, Lognormal, Logistic, etc.

For reasons that will become clear later, we will assume that the interarrival times are exponentially distributed.
$$X_i \sim \text{Exponential}(\lambda)$$
with $\lambda = 2$ for $\mathsf{E}[X_i] = 0.5$.

Let $S_n$ be the time at which the $n$th passenger arrives:
$$ S_n \overset{\mathsf{def}}{=} \sum_{i=1}^n X_i $$

Let $N(t)$ be the number of passengers that have arrived by time $t$.
$$N(t) \overset{\mathsf{def}}{=} \max\{n\,:\, S_n \leq t\}$$
The stochastic process $\{N(t),\, t \geq 0\}$ is a type of “counting process.”  The random path represented by this stochastic process is usually best interpreted as counting arrivals over time — it increases by 1 each time there is an arrival.

When the random variables $X_i$ are independent and identically distributed with $X_i \sim \text{Exponential}(\lambda)$ then $\{N(t),\, t \geq 0\}$ is a **Poisson Process with rate $\lambda$**.

In [3]:
lam = 2
steps = 10
S_0 = 0
X = np.random.exponential(1/lam, steps)
S = pd.DataFrame(np.cumsum(X), index=pd.Index(range(1,steps+1), name='n'), columns=['S_n'])
S['X_n'] = X
display(S)

Unnamed: 0_level_0,S_n,X_n
n,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1.286932,1.286932
2,2.580294,1.293363
3,2.76953,0.189235
4,3.499225,0.729695
5,4.539234,1.04001
6,4.685247,0.146013
7,4.764499,0.079253
8,5.09832,0.33382
9,5.309425,0.211105
10,5.442203,0.132778


In [4]:
plt = bplt.figure(title='Timeline of Arrivals', plot_height=200)
plt.title.align = 'center'
plt.title.text_font_size = '25px'
glyph_S = Circle(x="S_n", y=0, size=10, line_color="red", fill_color="red")
interarr = pd.DataFrame({'start': np.append(0, S['S_n'][:-1].values), 'end': S['S_n'].values}, index=pd.Index(range(1, steps+1), name='n'))
plt.add_layout(Arrow(start=VeeHead(size=5), end=VeeHead(size=5), 
                     x_start='start', x_end='end', y_start=0.5, y_end=0.5, 
                     source=ColumnDataSource(interarr)))
plt.add_glyph(ColumnDataSource(S), glyph_S)
plt.grid.visible = False
plt.yaxis.visible = False
plt.x_range = Range1d(0, max(S['S_n'])+1)
plt.xaxis.fixed_location = 0
bplt.show(plt)

In [5]:
N = pd.DataFrame(np.arange(0,steps+1), 
                 index=pd.Index(np.append(np.array(S_0), S['S_n'].values), name='t'), 
                 columns=['N(t)'])

In [6]:
plt = bplt.figure(title='Counting Process {N(t), t ≥ 0}')
plt.title.align = 'center'
plt.title.text_font_size = '25px'
source = ColumnDataSource(N)
glyph_N = Step(x="t", y="N(t)", line_color="blue", mode="after")
glyph_S = Circle(x="t", y=0, size=10, line_color="red", fill_color="red")
plt.add_glyph(source, glyph_N)
plt.add_glyph(ColumnDataSource(N.iloc[1:]), glyph_S)
plt.xaxis.axis_label = "time"
plt.yaxis.axis_label = "# of arrivals"
#plt.y_range = Range1d(0, max(N['N(t)'])+1)
plt.x_range = Range1d(0, max(S['S_n'])+1)
bplt.show(plt)