<a href="https://colab.research.google.com/github/profteachkids/CHE5136_Fall2021/blob/main/Exponential_Poisson_Distributions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import scipy.stats as stats
from plotly.subplots import make_subplots
import plotly.io as pio
import pandas as pd
pio.templates.default='plotly_dark'

In [2]:
def equal_area_histogram(data, nbins=None):
    data_sorted = np.sort(data)
    n=data_sorted.size
    nbins = int(2*n**(2/5)) if nbins is None else nbins
    nperbin = int(n//nbins)
    x = np.arange(0,n)
    edges = np.linspace(0,n,nbins+1)
    offsets = np.arange(np.ceil(-nperbin/4),np.floor(nperbin/4))
    indices=(edges[1:-1,None] + offsets[None,:]).astype(np.int64)
    Y=np.take(data_sorted,indices)
    X=np.stack((indices,np.ones_like(indices)),axis=2)
    XT = np.moveaxis(X,(0,1,2), (0,2,1))
    pinv=np.linalg.inv(XT@X) @ XT 
    mb=np.einsum('ijk,ik->ij',pinv,Y)
    smoothed_edges = np.r_[(np.min(data), mb[:,0]*edges[1:-1] + mb[:,1], np.max(data))]
    height = 1/nbins/(smoothed_edges[1:]-smoothed_edges[:-1])
    bin_edges=np.repeat(smoothed_edges,2)
    bin_heights=np.r_[0.,np.repeat(height,2),0.]

    return bin_edges, bin_heights

In [3]:
x=np.linspace(2,9,50)
y=np.random.exponential(x,size=(5,50)).T
ymean = np.mean(y,axis=1)
scaled_y = (y-ymean[:,None])/ymean[:,None]


In [4]:
x,y=equal_area_histogram(scaled_y.flatten())
fig2=make_subplots()
fig2.add_scatter(x=x, y=y, mode='lines', name='error distribution')
fig2.update_layout(width=800,height=600)

In [5]:
y_uniform=np.random.uniform(size=int(1e5))
y_exp = -np.log(1-y_uniform)*2

In [6]:
x,y=equal_area_histogram(y_exp)
fig3=make_subplots()
fig3.add_scatter(x=x, y=y, mode='lines', name='error distribution')
fig3.update_layout(width=800,height=600)

In [7]:
n=1e6
lmbda = 5000
p = lmbda/n
purchase = np.random.choice([0,1], size=int(1e6), p = [(1-p), p])

In [8]:
purchase_times = np.where(purchase)[0]
time_between = purchase_times[1:]- purchase_times[:-1]

In [9]:
x,y = equal_area_histogram(time_between)
fig4=make_subplots()
fig4.add_scatter(x=x, y=y, mode='lines', name='time between sales')

fig4.update_layout(width=800,height=600)

In [11]:
sales=np.sum(purchase.reshape((int(1e4),100)),axis=1)
sales_poison=stats.poisson(mu=p*100)

In [12]:
fig4=make_subplots()
fig4.add_histogram(x=sales, name='experimental')
fig4.add_scatter(x=np.arange(5),y=stats.poisson(mu=p*100).pmf(np.arange(5))*1e4, mode='lines', name='Poisson')
fig4.update_layout(width=800,height=600)