# Maximum likelihood estimation of the Bivariate Poisson distribution

In [40]:
import numpy as np
import matplotlib.pyplot as plt
import math
import pandas as pd
factorial = np.vectorize(math.factorial)
# combination = lambda x,y: factorial(x)/(factorial(y)*factorial(x-y))

## Example data

In [41]:
X = np.random.randint(0,10,100)
Y = np.random.randint(0,10,100)


## Implementation

The probabilty function is 
$$
    p(x,y) = P(X = x, Y = y) = e^{-(\lambda_1+\lambda_2+\lambda_3)} \frac{\lambda_1^x}{x!}\frac{\lambda_2^y}{y!} \sum_{i = 0}^{min(x,y)}{\binom xi \binom yi i! \left(\frac{\lambda_3}{\lambda_1\lambda_2}\right)^i}
$$

Then 
$$
    \log(p(x, y) ) = -(\lambda_1+\lambda_2+\lambda_3) + x \log(\frac{\lambda_1}{x!}) +  y \log(\frac{\lambda_2}{y!}) + \log \left( \sum_{i = 0}^{min(x,y)}{\binom xi \binom yi i! \left(\frac{\lambda_3}{\lambda_1\lambda_2}\right)^i} \right)
$$

In [42]:
def lp(x, y, l1, l2, l3):
    """ Computes the log of probabilty function"""

    suma = - l1 - l2  - l3 
    suma += x * np.log(l1 / math.factorial(x))
    suma += y * np.log(l2 / math.factorial(y))
    z = min(x, y)
    if(z != 0):
        I = np.array(range(z)) # define I = [0,1,..,min(x,y)]
        suma += np.log(((math.factorial(x)*math.factorial(y))/(factorial(x - I)*factorial(I)*factorial(y - I)) * np.exp(I*np.log(l3/l1*l2))).sum())
    return suma       

In [43]:
arr = np.array([])

for x,y in zip(X,Y):
    # print(x,y)
    arr = np.append(arr, lp(x,y,1,3,1.4))
arr 

array([  -6.97889075, -171.05982437,  -78.61177471, -119.51783503,
       -111.47629726,  -67.12249258,  -90.51458322,  -59.40977205,
        -40.42886251,  -90.23682322,   -7.47944154,  -35.41541534,
         -4.30138771,   -4.30138771,  -26.6539794 ,  -18.11221532,
         -4.30138771,  -90.51458322,   -9.67666612,   -7.47944154,
         -3.09616569,  -24.76532838,  -26.6539794 ,  -97.63873749,
        -86.98682815,  -78.79529607,  -90.51458322,   -6.97889075,
        -97.63873749,   -9.67666612, -171.05982437,   -5.4       ,
        -23.84439727,  -38.28383354,  -28.23884643,  -93.81042008,
         -6.97889075,  -11.56020685,  -90.23682322,   -9.67666612,
         -7.47944154,  -78.61177471,   -9.67666612,  -57.38584351,
        -13.75743142,  -49.92838482, -102.99624366,   -7.47944154,
       -150.85906806,  -60.17419365, -105.19346823,   -5.68768207,
        -43.77689498, -116.9693587 ,  -67.12249258,  -42.62608708,
       -114.42085896,  -85.20344844, -123.61782466,  -59.40977

Given $(X_1,Y_2),\dots,(X_n,Y_n)$ Bivariate Poisson random variables with parameters $(\lambda_1,\lambda_2, \lambda_3)$ then the log likelihood function is 

$$
    \text{llf} = \log(\prod_{i=0}^{n}{p(x_{i},y_{i})}) = \sum_{i= 0}^{n}{\log(p(x_{i},y_{i}))} 
$$

$$
    = -n(\lambda_1+\lambda_2+\lambda_3) + \sum_{i=0}^{n}{x_i \log \left(\frac{\lambda_1}{{x_i}!} \right)} + \sum_{i=0}^{n}{y_i \log \left(\frac{\lambda_2}{{y_i}!} \right)} + \sum_{j=1}^{n}{\log\left( \sum_{i = 0}^{min(x_j,y_j)}{\binom {x_j}i \binom {y_j}i i! \left(\frac{\lambda_3}{\lambda_1\lambda_2}\right)^i} \right)}
$$


In [49]:
def llf(X,Y,l1,l2,l3):
    lam = np.array([l3/(l1*l2)])
    if X.shape[0] != Y.shape[0]: raise Exception("X and Y must have the same length") 
    n = X.shape[0]
    suma = - n * (l1 + l2 + l3)
    suma += ( X * (np.log(l1) - np.log(factorial(X))) ).sum()
    suma += ( Y * (np.log(l1) - np.log(factorial(Y))) ).sum()
    for i in range(n):
        z = min(X[i],Y[i])
        if z == 0: continue
        I = np.array(range(z))
        suma += np.log((((factorial(X[i])*factorial(Y[i]))/(factorial(X[i] - I)*factorial(I)*factorial(Y[i] - I))) * np.power(lam,I)).sum())
    return suma 

## Algorithm to $\lambda_3$

From [Kawamura 1984] we have that $\lambda_1 + \lambda_3 = \hat{X}$ and $\lambda_2 + \lambda_3 = \hat{Y}$ with $\lambda_3 \in{[0,\min(\hat{X},\hat{Y})]}$ 

In [56]:
def optimaze_llf_l3(X,Y, iterations, size_step = 10):
    x_hat = X.sum()/X.shape[0]
    y_hat = Y.sum()/Y.shape[0]
    interval = (0,min(x_hat,y_hat))
    d = np.linspace(interval[0],interval[1],size_step)

    print(interval)
    for i in range(iterations):
        lff_d = np.array([ llf(X,Y,X.sum()/X.shape[0]-l,Y.sum()/Y.shape[0]-l,l) for l in d[0:size_step-2]]) # [0,min(x_hat,y_hat))
        print("llf values", lff_d)
        indexes = np.where(lff_d == lff_d.max()) # Miniminzg lff on D_i
        print("Minimum values", indexes)
        index = indexes[0][0]
        l3 = d[index]
        if(index == 0 or index == 9): 
            if(index == 0): 
                interval = (d[index], d[index+1])
            else: 
                interval = (d[index-1], d[index])
        else: interval = (d[index - 1], d[index + 1])
        print("D_{} = ".format(i), interval)
        d = np.linspace(interval[0],interval[1],size_step)
    
    return l3

In [57]:
optimaze_llf_l3(X,Y, iterations=10)

(0, 4.29)
llf values [-6214.08939679 -6210.7020177  -6210.6064085  -6214.74473508
 -6224.79251191 -6243.50105543 -6275.31214747 -6327.17046033]
Minimum values (array([2]),)
D_0 =  (0.4766666666666667, 1.4300000000000002)
llf values [-6210.7020177  -6210.37281752 -6210.21156742 -6210.2273416
 -6210.43068376 -6210.83368609 -6211.45009206 -6212.29542202]
Minimum values (array([2]),)
D_1 =  (0.5825925925925927, 0.7944444444444445)
llf values [-6210.37281752 -6210.32217292 -6210.27991005 -6210.24612581
 -6210.22092057 -6210.20439826 -6210.19666633 -6210.19783584]
Minimum values (array([6]),)
D_2 =  (0.7002880658436215, 0.747366255144033)
llf values [-6210.20439826 -6210.20191654 -6210.19987009 -6210.19826015
 -6210.19708792 -6210.19635463 -6210.19606153 -6210.19620985]
Minimum values (array([6]),)
D_3 =  (0.7264426154549612, 0.7369044352994971)
llf values [-6210.19635463 -6210.19625141 -6210.19616994 -6210.19611024
 -6210.19607232 -6210.19605619 -6210.19606186 -6210.19608935]
Minimum values

0.7325330351763267

# References


Kawamura, Kazutomo: Direct calculation of maximum
likelihood estimator for the bivariate Poisson distribution. In: Kodai Mathe-
matical Journal 7 (1984), Nr. 2, S. 211 – 221. – URL https://doi.org/10.
2996/kmj/1138036908