In [2]:
import pandas as pd
import math
import numpy as np
import statsmodels.api as sm
from statsmodels.base.model import GenericLikelihoodModel
import matplotlib.pyplot as plt
from scipy import stats

from statsmodels.tsa.regime_switching.markov_autoregression import MarkovAutoregression
from statsmodels.tsa.regime_switching.markov_regression import MarkovRegression

In [3]:
#generate some fake data

n = 1000
beta01, beta11 = 5,-3
beta02, beta12 = 2, 4

#set up regression mixture
x1 = np.random.uniform(0, 10, size=400)
x2 = np.random.uniform(0, 10, size=600)

y1 = beta01 + beta11*x1 + np.random.normal(scale=2.0, size=400)
y2 = beta02 + beta12*x2 + np.random.normal(scale=4.0,size=600)

x = np.concatenate([x1, x2])
y = np.concatenate([y1, y2])


#set up 2 component mixture
a1 = np.random.normal(0, 1, size=600)
a2 = np.random.normal(5, 3, size=400)
a = np.concatenate([a1,a2])

In [17]:
class Clusters(GenericLikelihoodModel):
    
    def __init__(self, endog, exog, ncomp=2, switch_var=True):
        super(Clusters, self).__init__(endog, exog)
        
        nobs, k = self.exog.shape
        self.ncomp = ncomp
        self.nparams = k*ncomp + 2
        #self.weights = np.tile(np.ones(ncomp)/(1.*ncomp),(nobs,1))
        
        #random start
        np.random.seed(0)
        weights = np.random.uniform(size=(nobs,ncomp))
        denom = np.repeat(weights.sum(axis= 1),self.ncomp).reshape(nobs,ncomp)
        self.weights = (weights/denom)
        
        #adjust param names
        param_names = []
        for comp in range(ncomp):
            for name in self.data.xnames:
                param_names.append(name+str(comp))
            param_names.append('sigma'+str(comp))
        self.data.xnames = param_names
     
    
    def nloglikeobs(self, params, v=False):
        """do maximum likelihood estimation"""
        nobs, k = self.exog.shape
        comp_likes = []
        for comp in range(self.ncomp):
            comp_params = params[comp*(k+1): (comp+1)*(k + 1)]
            beta = comp_params[:-1]
            sigma = comp_params[-1]
            
            beta = np.tile(beta,nobs).reshape(nobs,k)
            means = (beta*self.exog).sum(axis=1) 
            like = stats.norm.logpdf(self.endog- means, loc=0, scale=1)
            comp_likes.append(like)

        comp_likes = np.array(comp_likes).transpose()
        comp_likes = self.weights*comp_likes
        
        return -comp_likes.sum().sum()
    
    
    
    def fit(self, start_params=None, maxiter=1000, maxfun=5000, **kwds):
        """print that we did it"""
        tol = 1e-8
        nobs, k = self.exog.shape
        
        #loop variables
        maxiter = 5
        diff = 1
        model = None
        
        while diff > tol and maxiter >=0 :

            start = np.linspace(1.,5., len(self.data.xnames))
            model = super(Clusters, self).fit(disp=False, start_params=start,
                                             method='nm', maxiter=200, full_output=True,  retall=True)
            weights = []
            
            #recompute weights
            for comp in range(self.ncomp):
                comp_params = model.params[comp*(k+1): (comp+1)*(k + 1)]
                beta = comp_params[:-1]
                sigma = comp_params[-1]
                
                beta = np.tile(beta,nobs).reshape(nobs, k)
                mean = (beta*self.exog).sum(axis=1)
                weights.append( stats.norm.pdf(self.endog - mean, loc=0, scale=1) )

            
            #update loop variables
            weights = np.array(weights).transpose()
            denom = np.repeat(weights.sum(axis= 1),self.ncomp).reshape(nobs,self.ncomp)
            weights = (weights/denom)
            
            
            #this is hacky
            diff = np.sort(weights, axis=1)- np.sort(self.weights, axis=1)
            diff = np.abs(diff).mean()
            
            maxiter = maxiter-1
            self.weights = weights
        
        if maxiter > 0:
            print '[EM convergence achieved] \n'
            print'========================================================'
        print 'weights: %s'%self.weights.mean(axis=0)
        print'========================================================\n'
        
        return model
 
        
#test case #1 - easy 2 component mixture
model = Clusters(y, sm.add_constant(x))
                               
result = model.fit()
print result.summary()
print result.mle_retvals

[EM convergence achieved] 





AttributeError: 'numpy.ndarray' object has no attribute 'dropna'