In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.special import factorial

In [None]:
# plt.style.available

In [None]:
'''
------------------------------------------
            SETTINGS
------------------------------------------
'''
plt.style.use('fivethirtyeight')
# plt.style.use('seaborn-v0_8-notebook')
plt.rcParams['font.family'] = 'PT Sans'
# plt.rcParams['font.serif'] = 'Ubuntu'
plt.rcParams['font.monospace'] = 'Ubuntu Mono'
plt.rcParams['font.size'] = 14
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['axes.labelweight'] = 'bold'
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12
plt.rcParams['legend.fontsize'] = 14
plt.rcParams['figure.titlesize'] = 12

dpi = 100

In [None]:
def get_filename(filename: str, lecture_id: int = 1, file_extension: str = '.png') -> str:
    return f"L{lecture_id}_{filename}{file_extension}"

In [None]:
outdir = '../figures/'
lecture_id = 1

In [None]:
seed = 10
prng = np.random.RandomState(seed)

# 1. Rolling a dice
Let's simulate a stochastic process of rolling a dice.

In [None]:
filename = 'rolling_dice'
filename = get_filename(filename,lecture_id=lecture_id)

outfile = filename

In [None]:
T = 20 # number of draws
y = prng.choice(np.arange(1,7),size=T,replace=True)
x = np.arange(T).astype(int)

In [None]:
plt.scatter(x,y, s=100)
plt.step(x,y,  where='mid')

plt.xlabel('Iteration id')
plt.ylabel('Number selected')
plt.xticks(x.astype(int))
plt.yticks(y.astype(int))

# plt.title(f"Rolling a dice")

plt.tight_layout()
if outfile is not None:
    
    plt.savefig(f"{outdir}{outfile}", dpi=dpi, format=None, metadata=None,
                bbox_inches='tight', pad_inches=0.1,
                facecolor='auto', edgecolor='auto',
                backend=None
                )
    print(f"Figure saved in {outdir}{outfile}")
        
plt.show()

### 1.1 Increments
Let's take a look at the increments:  
$X_{n+1} - X_{n}$ ,  
where $X_n \in \{1,\dots,6\}$ is the result at time $n$.

In [None]:
filename = 'rolling_dice_increments'
filename = get_filename(filename,lecture_id=lecture_id)

outfile = filename
outfile

In [None]:
y_increments = np.diff(y)

In [None]:
plt.scatter(x[1:],y_increments, s=100)
plt.step(x[1:],y_increments,  where='mid')
plt.plot(x,np.zeros(len(x)), color = 'grey',ls='--',zorder=0,lw=2)

plt.xlabel('Iteration id')
plt.ylabel('Increment')
plt.xticks(x.astype(int))
plt.yticks(y_increments.astype(int))

# plt.title(f"Rolling a dice: increments")

plt.tight_layout()
if outfile is not None:
    
    plt.savefig(f"{outdir}{outfile}", dpi=dpi, format=None, metadata=None,
                bbox_inches='tight', pad_inches=0.1,
                facecolor='auto', edgecolor='auto',
                backend=None
                )
    print(f"Figure saved in {outdir}{outfile}")
        
plt.show()

In general they can be positive, or negative and fluctuate between a maximum and a minimum value.

### 1.2 Analyzing the empirical distributions
To better understand the process, let's investigate some relevant quantities.  
As we are dealing with random variables, it is useful to observe distributions.

In [None]:
filename = 'rolling_dice_dist'
filename = get_filename(filename,lecture_id=lecture_id)

outfile = filename
outfile

If we want to investigate a probability distribution, we need to consider a large enough number of samples.  
Hence, let's increase the number of draws $T$.

In [None]:
T = 10000 # number of draws
y = prng.choice(np.arange(1,7),size=T,replace=True)
y_increments = np.diff(y)
x = np.arange(T).astype(int)

- What are the most frequent numbers drawn
- How is the increment distributed?

In [None]:
plt.figure(figsize=(6.5,3))

plt.subplot(1,2,1)
plt.hist(y, bins=6,rwidth=0.5)
plt.ylabel('Frequency')
plt.xlabel('Number selected')
plt.xticks(np.arange(1,7))

plt.subplot(1,2,2)
plt.hist(y_increments, bins=11,rwidth=0.5)
plt.ylabel('Frequency')
plt.xlabel('Increment')
plt.xticks(np.arange(-5,6))
# plt.xlim([-5,5])

plt.tight_layout()

if outfile is not None:
    
    plt.savefig(f"{outdir}{outfile}", dpi=dpi, format=None, metadata=None,
                bbox_inches='tight', pad_inches=0.1,
                facecolor='auto', edgecolor='auto',
                backend=None
                )
    print(f"Figure saved in {outdir}{outfile}")
        
plt.show()

### 1.3 Turning a dice rolling into a _counting process_
The process was not a counting process.   
For this we need: 
- Positive $N(t) \geq 0$
- Non-decreasing $N(s+t) - N(s) \geq 0$

In [None]:
filename = 'rolling_dice_counting'
filename = get_filename(filename,lecture_id=lecture_id)

outfile = filename
outfile

In [None]:
# outfile = None

Let's first generate some random dice draws.

In [None]:
T = 20 # number of draws
y = prng.choice(np.arange(1,7),size=T,replace=True)

Now let's make it a counting process!  
How?  
By Introducing a $N(t)$ equal to the sum of the numbers drawn.

In [None]:
N = np.cumsum(y)
x = np.arange(len(N))
n_increments = np.diff(N)

In [None]:
plt.figure(figsize=(12,4))

plt.subplot(1,2,1)
plt.scatter(x,N, s=10)
plt.step(x,N, where='mid')

plt.xlabel('Iteration id')
plt.ylabel('N(t)')


plt.subplot(1,2,2)
plt.scatter(x[1:],n_increments, s=10)
plt.step(x[1:],n_increments,  where='mid')
plt.plot(x,np.zeros(len(x)), color = 'grey',ls='--',zorder=0,lw=2)

plt.xlabel('Iteration id')
plt.ylabel('Increment N(t)')
plt.xticks(x.astype(int))
plt.yticks(n_increments.astype(int))
plt.tight_layout()

if outfile is not None:
    
    plt.savefig(f"{outdir}{outfile}", dpi=dpi, format=None, metadata=None,
                bbox_inches='tight', pad_inches=0.1,
                facecolor='auto', edgecolor='auto',
                backend=None
                )
    print(f"Figure saved in {outdir}{outfile}")
        
# plt.show()

# 2. Poisson process (PP)

Let's generate events using the definition of a Poisson process.

In [None]:
seed = 10
prng = np.random.RandomState(seed)

Here we also want to generate more than one sample of the process, so we play also with the variable $M$.

In [None]:
rate, delta_t = 10, 1
T = 20 # max time window
M = 1000 # number of samples of the process

Now we can draw increments from a Poisson of rate $\lambda \,t$.  
And we repeat this $M$ times.

In [None]:
y_increments = prng.poisson(rate * delta_t,size=(M,T))
y = np.array([np.cumsum(y_increments[i]) for i in range(M)])

x = np.arange(len(y[0]))

len(y_increments),len(y), y.shape

In [None]:
filename = 'pp_basic'
filename = get_filename(filename,lecture_id=lecture_id)

outfile = filename
outfile

In [None]:
idx = 0
plt.figure(figsize=(12,4))

plt.subplot(1,2,1)

selected_samples = prng.choice(np.arange(M),5)
for idx in selected_samples:
    plt.step(x,y[idx], where='mid', label = f'Sample {idx}',alpha=0.7)

plt.xlabel('Iteration id')
plt.ylabel('N(t)')
plt.xticks(x.astype(int))
# plt.yticks(y.astype(int))
plt.legend()

plt.subplot(1,2,2)

plt.step(x,y_increments[idx],  where='mid', label=f'Sample {idx}')
plt.plot(x,np.zeros(len(x)), color = 'grey',ls='--',zorder=0,lw=2)

plt.xlabel('Iteration id')
plt.ylabel('Increment N(t)')
plt.xticks(x.astype(int))
# plt.yticks(y_increments.astype(int))
plt.legend()
plt.tight_layout()

if outfile is not None:
    
    plt.savefig(f"{outdir}{outfile}", dpi=dpi, format=None, metadata=None,
                bbox_inches='tight', pad_inches=0.1,
                facecolor='auto', edgecolor='auto',
                backend=None
                )
    print(f"Figure saved in {outdir}{outfile}")
        
# plt.show()

It does not really tell us anything about being _Poisson_.  
Let's generate poisson numbers with paramerer $\lambda t$ and compare with the empirical increments $N(s+t)-N(s)$.

In [None]:
def poisson_pmf(x, lmbda):
    return (np.power(lmbda, x) / factorial(x)) * np.exp(-lmbda)
    
xs = np.arange(y_increments.max()+5)
ps = poisson_pmf(xs, rate * delta_t)

In [None]:
outfile = None

In [None]:
bins = 20

plt.figure(figsize=(6,4))

plt.hist(y_increments[0], bins=bins,rwidth=0.5,density=True, label='Data')
plt.ylabel('Frequency')
plt.xlabel('Increment')

plt.plot(xs, ps, 'ko-', lw=1,label='Theory')
plt.legend()

plt.tight_layout()

if outfile is not None:
    
    plt.savefig(f"{outdir}{outfile}", dpi=dpi, format=None, metadata=None,
                bbox_inches='tight', pad_inches=0.1,
                facecolor='auto', edgecolor='auto',
                backend=None
                )
    print(f"Figure saved in {outdir}{outfile}")
        
plt.show()

- **Remark**: from the plots above we cannot really tell _when_ the events happened in time. We only know that within a time interval $t$ there where $k$ events.

# 3. Interarrival times

Using the property of **interrarival times** of being iid exponentially distributed variables, we can now **simulate** also the times of _when_ events happen.

In [None]:
seed = 10
prng = np.random.RandomState(seed=seed)

In [None]:
rate = 10
T = 2 # observation time window
N = 20 # number of events

# generate interarrival times from exponential distribution
x = prng.exponential(1/rate, N)  

# compute successive sums to create a sequence of arrival times
ts = np.cumsum(x)  

# remove events that exceed the horizon
ts = ts[ts < T]  

y = np.ones(len(x))

N = np.arange(len(ts))
len(ts),len(x), len(N)

In [None]:
filename = 'pp_interarrival'
filename = get_filename(filename,lecture_id=lecture_id)

outfile = filename
outfile

# outfile = None

In [None]:
plt.figure(figsize=(9,5))

plt.subplot(2,1,1)
plt.scatter(ts, y, s = 100, edgecolors='black')
plt.ylabel('Event happening')
plt.xlabel('t')
plt.yticks(np.arange(1,2))

plt.subplot(2,1,2)
plt.step(ts,N)
plt.ylabel('N(t)')
plt.xlabel('t')
# plt.yticks(np.arange(1,2))

if outfile is not None:
    
    plt.savefig(f"{outdir}{outfile}", dpi=dpi, format=None, metadata=None,
                bbox_inches='tight', pad_inches=0.1,
                facecolor='auto', edgecolor='auto',
                backend=None
                )
    print(f"Figure saved in {outdir}{outfile}")

In [None]:
def exponential_pmf(x, lmbda):
    return lmbda * np.exp(-lmbda * x)
xs = np.linspace(0,x.max()+5,100)
exps = exponential_pmf(xs, rate)


Let's check that the empirical distribution matches the theoretical exponent distribution of the {$\tau_n$}

In [None]:
bins = 20

plt.figure(figsize=(6,4))

plt.hist(x, bins=bins,rwidth=0.5,density=True, label='Empirical')
plt.plot(xs,exps, label='Theoretical exponential')
plt.ylabel('Frequency')
plt.xlabel('Interrarival time')
plt.xlim([0,x.max() * 1.1])
plt.legend()


plt.tight_layout()

if outfile is not None:
    
    plt.savefig(f"{outdir}{outfile}", dpi=dpi, format=None, metadata=None,
                bbox_inches='tight', pad_inches=0.1,
                facecolor='auto', edgecolor='auto',
                backend=None
                )
    print(f"Figure saved in {outdir}{outfile}")
        
plt.show()

### Can we simulate a PP in an other way, beside using the {$\tau_n$}?
We need to simulate the times _when_ the events happen.
How?

Using the property of PP vs **uniform** distribution!

In [None]:
seed = 10
prng = np.random.RandomState(seed)

rate, delta_t = 10, 1
T = 1000 # max time window

1. For each time interval $t$ we generate $n(t) = N(s+t) - N(s)$ events. 

2. Then, we generate $n(t)$ time indices {$t_k$} uniformly at random within $[s,s+t)$. 

Example for one interval

In [None]:
nt = np.random.poisson(rate * delta_t) # number of events within an interval delta_t
ts = np.random.uniform(0, delta_t, size=nt)
ts = np.sort(ts)
print(f"n(t) = {nt}\nt_k={ts}")

plt.figure(figsize=(6,6))

plt.subplot(2,1,1)
y = np.ones(len(ts))
plt.scatter(ts, y, s = 100, edgecolors='black')
plt.ylabel('Event happening')
plt.xlabel('t')
# plt.yticks(np.arange(1,2))

plt.subplot(2,1,2)
plt.step(ts,np.cumsum(y))
plt.plot(ts,nt * np.ones(len(ts)),ls='--',label=f"n(t)={nt}", c='black',lw=2)
plt.ylabel('N(t)')
plt.xlabel('t')
plt.legend()

plt.show()

Now sample for $T$ many intervals and collect the {$\tau_k$}

In [None]:
taus = [] # interarrival times
for i in range(T):

    nt = np.random.poisson(rate * delta_t) # number of events within an interval delta_t
    ts = np.random.uniform(0, delta_t, size=nt)
    ts = np.sort(ts)

    # append all these interarrival times
    taus.extend(np.diff(ts))
taus = np.array(taus)

Plot empirical distribution and check if it matches an exponential distribution of mean $1/\lambda$.

In [None]:
xs = np.linspace(0,taus.max(),100)
exps = exponential_pmf(xs, rate)

In [None]:
plt.figure()

bins = 20
plt.hist(taus,density=True)
plt.hist(exps,alpha=0.5,density=True)
plt.xlim(0,taus.max())
plt.show()

This has some issue with the binning...let's try plotting with the cumulative distribution function (CDF) instead.

In [None]:
# compute the (experimental) CDF of the data
ecdf_x = np.sort(taus)
ecdf_y = np.arange(len(ecdf_x)) / float(len(ecdf_x))

# compute the (exact) CDF of the exponential
xs = np.linspace(0, taus.max(), 100)
exps = 1 - np.exp(-rate * xs)

fig, ax = plt.subplots(1,1, figsize=(8,5))
ax.plot(xs, exps, label='Theoretical (Exp) CDF')
ax.plot(ecdf_x, ecdf_y, label='Empirical CDF')
ax.set(xlabel=r'$\tau_k$ (interarrival time)', ylabel=r'$F(x)$')
ax.legend(fontsize=14)

## Q: what happens if you change the _uniform_ distribution to something else?
Let's try.  
We can for instance use a Gamma distribution.

In [None]:
shape, scale= 1,1  # mean= shape * scale, std=scale*sqrt(shape)
print(shape * scale, np.sqrt(shape) * scale)
# np.random.gamma(shape, scale, 1)

In [None]:
nt = np.random.poisson(rate * delta_t) # number of events within an interval delta_t
ts = np.random.gamma(shape, scale, size=nt)
ts = np.sort(ts)
print(f"n(t) = {nt}, {len(ts)}")

plt.figure(figsize=(6,6))

plt.subplot(2,1,1)
y = np.ones(len(ts))
plt.scatter(ts, y, s = 100, edgecolors='black')
plt.ylabel('Event happening')
plt.xlabel('t')
# plt.yticks(np.arange(1,2))

plt.subplot(2,1,2)
plt.step(ts,np.cumsum(y))
plt.plot(ts,nt * np.ones(len(ts)),ls='--',label=f"n(t)={nt}", c='black',lw=2)
plt.ylabel('N(t)')
plt.xlabel('t')
# plt.xlim([0,delta_t])
plt.legend()

plt.show()

So far we do not notice much difference ...  
Let's generate more intervals.

In [None]:
taus = [] # interarrival times
for i in range(T):

    nt = np.random.poisson(rate * delta_t) # number of events within an interval delta_t
    # ts = np.random.uniform(0, delta_t, size=nT)
    ts = np.random.gamma(shape, scale, size=nt)
    ts = np.sort(ts)

    # append all these interarrival times
    taus.extend(np.diff(ts))
taus = np.array(taus)

Generate samples from a Gamma distribution to plot the theoretical CDF

In [None]:
# compute the (experimental) CDF of the data
ecdf_x = np.sort(taus)
ecdf_y = np.arange(len(ecdf_x)) / float(len(ecdf_x))

# compute the (exact) CDF of the exponential
xs = np.linspace(0, taus.max(), 100)
exps = 1 - np.exp(-rate * xs)

fig, ax = plt.subplots(1,1, figsize=(8,5))
ax.plot(xs, exps, label='Theoretical (Exp) CDF')
ax.plot(ecdf_x, ecdf_y, label='Empirical CDF')
ax.set(xlabel=r'$\tau_k$ (interarrival time)', ylabel=r'$F(x)$')
ax.legend(fontsize=14)

The two distributions don't match.  
That's because you need **uniformly** distributed times $t_k$ to get **exponentially** distributed interarrivals $\tau_k$,

# 4. Non-uniform Poisson Process
Let's simulate a NUPP and see how it looks like.

We consider as an example:
 - $\lambda(t) = \lambda_{max} \, \cos^2(2\pi\, t)$

This is a periodic behavior with $\lambda$ oscillating between 0 and its maximum value $\lambda_{max}$.


Simulating the process (no need to know why this is the way to simulate, but if you are interested you should look at "rejection sampling").

In [None]:
T = 2
maxrate = 10

# Generate rate function
def rfun(t, maxrate: float = 10.):
    return maxrate * (np.cos(t * (2*np.pi)) ** 2)

N_T = np.random.poisson(maxrate * T)
ts  = np.random.uniform(0, T,size=N_T)

# thinning
ts_thin = ts[np.random.uniform(size=len(ts)) <= (rfun(ts, maxrate=maxrate) / maxrate)].copy() # valid values for the NUPP

In [None]:
fig, axs = plt.subplots(2,1, figsize=(10,3), sharex=True)

# draw rate
xs = np.linspace(0,T,100)
axs[0].plot(xs, rfun(xs), 'b-')
axs[0].set_ylabel(r'$\lambda$')
# draw points
axs[1].scatter(ts, [0]*len(ts), c='k', s=50, alpha=0.1)
axs[1].scatter(ts_thin, [0]*len(ts_thin), c='b', s=50, alpha=0.4)
axs[1].set(xlim=[0, T])
axs[1].get_yaxis().set_visible(False)
axs[1].set_xlabel('t')