In [5]:
import pandas as pd
import math
import numpy as np
import statsmodels.api as sm
from statsmodels.base.model import GenericLikelihoodModel
import matplotlib.pyplot as plt
from scipy import stats

In [86]:
#generate some fake data

n = 1000
beta01, beta11 = 5,-3
beta02, beta12 = 2, 3


#set up regression mixture
x1 = np.random.uniform(0, 5, size=300)
x2 = np.random.uniform(0, 5, size=600)

y1 = beta01 + beta11*x1 + np.random.normal(size=300)
y2 = beta02 + beta12*x2 + np.random.normal(size=600)

x = np.concatenate([x1, x2])
y = np.concatenate([y1, y2])

In [98]:
class Clusters(GenericLikelihoodModel):
    
    def __init__(self, endog, exog, ncomp=2):
        super(Clusters, self).__init__(endog, exog)
        
        nobs, k = self.exog.shape
        self.ncomp = ncomp
        self.nparams = k*ncomp #hard coded for now
        self.weights = np.ones(ncomp)/(1.*ncomp)

        param_names = []
        for comp in range(ncomp):
            for name in self.data.xnames:
                param_names.append(name+str(comp))
        self.data.xnames = param_names
        
    
    def nloglikeobs(self, params, v=False):
        """do maximum likelihood estimation"""
        nobs, k = self.exog.shape
        comp_likes = []
        
        for comp in range(self.ncomp):
            
            beta = params[2*comp: (k + 2*comp)]
            beta = np.tile(beta,nobs).reshape(nobs, k)
            resid = self.endog - (beta*self.exog).sum(axis=1)
            
            like = stats.norm.pdf(resid, loc=0, scale=1)
            comp_likes.append(like)
            
            
        weights = np.tile(self.weights, nobs).reshape(nobs, self.ncomp)
        comp_likes = weights*np.array(comp_likes).transpose()
        
        return -np.log(comp_likes.sum(axis=1)).sum()
    
    
    def fit(self, start_params=None, maxiter=1000, maxfun=5000, **kwds):
        """print that we did it"""
        tol = 1e-10
        nobs, k = self.exog.shape
        
        #loop variables
        maxiter = 50
        diff = 1
        model = None
        
        while diff > tol and maxiter >=0 :
            model = super(Clusters, self).fit(disp=False)
            weights = []
            
            #recompute weights
            for comp in range(self.ncomp):
                beta = model.params[2*comp: (k + 2*comp)]
                beta = np.tile(beta,nobs).reshape(nobs, k)
                resid = self.endog - (beta*self.exog).sum(axis=1)
                weights.append( stats.norm.pdf(resid, loc=0, scale=1).mean() )
            
            #update loop variables
            weights = np.array(weights)/np.array(weights).sum()
            diff = np.array(weights).mean()- self.weights.mean()
            maxiter = maxiter-1
            
            self.weights = weights
        
        if maxiter > 0:
            print '[EM convergence achieved] \n'
            print'========================================================'
            print 'weights: %s'%self.weights
            print'========================================================\n'
        
        model = super(Clusters, self).fit()
        return model
        
        
model = Clusters(y,sm.add_constant(x))
result = model.fit()

print result.summary()


[EM convergence achieved] 

weights: [0.36165024 0.63834976]

Optimization terminated successfully.
         Current function value: 1.956503
         Iterations: 332
         Function evaluations: 560
                               Clusters Results                               
Dep. Variable:                      y   Log-Likelihood:                -1760.9
Model:                       Clusters   AIC:                             3526.
Method:            Maximum Likelihood   BIC:                             3535.
Date:                Tue, 12 Mar 2019                                         
Time:                        19:27:42                                         
No. Observations:                 900                                         
Df Residuals:                     898                                         
Df Model:                           1                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
--------