## <font color = "brown"> Importing Libraries </font>

In [1]:
import numpy as np

## <font color = "brown"> Generating Data </font>

$$
\begin{aligned}
\text{Let, }\space\space Z_i&: \text{The cluster of data i}\\
X_i &:\text{The value of data i}
\\
\\\text{We want to generate }&\text{from a poisson mixture model where,} \\
\\Z_i \sim \space&\text{Categorical}(0.2, 0.8) \\
\\\text{if } Z_i &= 1: \\
&\space\space X_i \sim \text{Poisson}(2) \\
\text{if } Z_i &= 2: \\
&\space\space X_i \sim \text{Poisson}(6) \\
\end{aligned}
$$

In [2]:
Z1 = np.random.poisson(2, 200)
Z2 = np.random.poisson(6, 800)
X = np.concatenate((Z1, Z2))

## <font color = "brown"> Algorithm </font>

In [3]:
def Poisson(x, l):
    
    # Returns pmf of poisson distribution
    pmf = (l ** x) * np.exp(-l) / np.math.factorial(x)
    return pmf

In [4]:
def Posterior(X, lambdas, probs):
    
    Post = []
    
    for data in X: # Loop through each class
        
        EachClass = []
        for i in range(len(lambdas)): # Loop through each data

            # Calculate likelihood
            term = Poisson(data, lambdas[i]) * probs[i]
            EachClass.append(term)

        # Calculate posterior
        EachClass = [x/sum(EachClass) for x in EachClass]
    
        Post.append(EachClass)
        
    # Returns n-dimensional list of posterior probabilities
    # Where n: Number of clusters
    return np.array(Post).T.tolist()

In [5]:
def OptimalPi(posterior):
    
    Numerator = []
    Denominator = []
    
    # Get posteriors for each cluster
    A1 = posterior[0]
    A2 = posterior[1]
    
    # Create a list of A1 + A2
    A1PlusA2 = [a + b for a, b in zip(A1, A2)]
        
    # Calculate optimal pi1
    pi1 = sum(A1)/sum(A1PlusA2)
    
    return pi1, 1 - pi1

In [6]:
def OptimalLambda(X, posterior):
    
    NewLambdas = []
    
    for i in range(len(posterior)): # Loop through each class
        
        # Get posterior of cluster
        A = posterior[i]
        
        # Create list for A * Data
        Numerator = [a * b for a, b in zip(X, A)]
        
        # Calculate optimal lambda
        lambd = sum(Numerator)/sum(A) 
        NewLambdas.append(lambd)
    
    return NewLambdas

In [7]:
def IncompleLogLikelihood(X, lambdas, probs):
    
    LogLikes = []
    
    for data in X: # Loop through each data
        
        Likelihoods = []
        
        for i in range(len(lambdas)): # Loop through each class
            
            likelihood = Poisson(data, lambdas[i]) * probs[i] # Calculate likelihood
            Likelihoods.append(likelihood)
            
        LogLike = np.log(sum(Likelihoods)) # Calculate log of sum of likelihood
        LogLikes.append(LogLike)
        
    # Calculate incomplete log-likelihood
    Incomplete = sum(LogLikes)
    return Incomplete
        

In [8]:
def ExpectationMaximisation(X, lambdas, probs):
    
    Incompletes = []
    delta = 1
    epsilon = 1e-5
    iter = 0

    while delta > epsilon:
        
        # Calculate posterior probabilities
        # Given current parameters lambdas and probs
        posteriors = Posterior(X, lambdas, probs)
        
        # Calculate new optimal parameters
        # Given current posterior probabilities
        probs = OptimalPi(posteriors)
        lambdas = OptimalLambda(X, posteriors)

        # Calculate incomplete log-likelihood with new parameters
        ILL = IncompleLogLikelihood(X, lambdas, probs)
        Incompletes.append(ILL)
        
        # Calculate change in incomplete log-likelihood
        if len(Incompletes) > 1:
            Current = Incompletes[len(Incompletes) - 1]
            Previous = Incompletes[len(Incompletes) - 2]
            delta = Current - Previous
    
        iter += 1
    print(f"EM Algorithm completed in {iter} iterations!")
    
    return lambdas, probs

In [9]:
lambas = [1, 10]
probs = [0.5, 0.5]

FinalLambda, FinalProb = ExpectationMaximisation(X, lambas, probs)

EM Algorithm completed in 79 iterations!


In [10]:
FinalLambda

[2.2633493834138485, 5.932313029160158]

In [11]:
FinalProb

(0.23448393394619185, 0.7655160660538082)