In [80]:
import pandas as pd
import math
import numpy as np
import statsmodels.api as sm
from statsmodels.base.model import GenericLikelihoodModel
import matplotlib.pyplot as plt
from scipy import stats
import warnings

from statsmodels.tsa.regime_switching.markov_autoregression import MarkovAutoregression
from statsmodels.tsa.regime_switching.markov_regression import MarkovRegression

In [81]:
#generate some fake data

n = 1000
beta01, beta11 = 5,-3
beta02, beta12 = 2, 4

#set up regression mixture
x1 = np.random.uniform(0, 10, size=400)
x2 = np.random.uniform(0, 10, size=600)

y1 = beta01 + beta11*x1 + np.random.normal(scale=4.0, size=400)
y2 = beta02 + beta12*x2 + np.random.normal(scale=1.0,size=600)

x = np.concatenate([x1, x2])
y = np.concatenate([y1, y2])


#set up 2 component mixture
a1 = np.random.normal(0, 1, size=600)
a2 = np.random.normal(5, 3, size=400)
a = np.concatenate([a1,a2])

In [108]:
class SwitchingRegression(GenericLikelihoodModel):
    
    def __init__(self, endog, exog, ncomp=2, switch_var=True):
        super(SwitchingRegression, self).__init__(endog, exog)
        
        nobs, k = self.exog.shape
        self.ncomp = ncomp
        self.nparams = (k+1)*ncomp
        
        #random start
        np.random.seed(0)
        weights = np.random.uniform(size=(nobs,ncomp))
        denom = np.repeat(weights.sum(axis= 1),self.ncomp).reshape(nobs,ncomp)
        self.weights = (weights/denom)
        
        #adjust param names
        param_names = []
        for comp in range(ncomp):
            for name in self.data.xnames:
                param_names.append(name+str(comp))
            param_names.append('sigma'+str(comp))
        
        self.data.xnames = param_names
        
        
    
    def nloglikeobs(self, params):
        """do maximum likelihood estimation"""

        nobs, k = self.exog.shape
        likelihood = []
        for comp in range(self.ncomp):
            
            #get params
            comp_params = params[comp*(k+1): (comp+1)*(k + 1)]
            beta = comp_params[:-1]
            sigma = comp_params[-1]
            weights = self.weights[:,comp]
            
            #calculate likelihood from formula
            beta = np.tile(beta,nobs).reshape(nobs,k)
            means = (beta*self.exog).sum(axis=1)
            comp_like = -np.log(sigma)-.5*((self.endog-means)/sigma)**2 
            comp_like = weights*comp_like
            likelihood.append(comp_like)

        return -np.array(likelihood).sum().sum()
    
    
    def e_step(self, params):
        #recompute weights
        nobs, k = self.exog.shape
        weights =[]
        for comp in range(self.ncomp):
            comp_params = params[comp*(k+1): (comp+1)*(k + 1)]
            beta = comp_params[:-1]
            sigma = comp_params[-1]
              
            beta = np.tile(beta,nobs).reshape(nobs, k)
            mean = (beta*self.exog).sum(axis=1)
            wi = np.exp(-.5*((self.endog - mean)/sigma)**2)/(sigma*(2*math.pi)**.5)
            weights.append(np.maximum(wi,1e-5))
            
        #update loop variables
        weights = np.array(weights).transpose()
        denom = np.repeat(weights.sum(axis= 1),self.ncomp).reshape(nobs,self.ncomp)
        return (weights/denom)
    
    
    def fit(self, **kwds):
        """print that we did it"""
        nobs, k = self.exog.shape
        model = None
        
        for i in range(10):
            with warnings.catch_warnings():
                warnings.filterwarnings("ignore")
                start = np.ones(len(self.data.xnames))
                model = super(SwitchingRegression, self).fit(disp=False, start_params=start, method='nm')
                weights = self.e_step(model.params)
                self.weights = weights
        
        return model


model = SwitchingRegression(y, sm.add_constant(x))
result = model.fit()
print result.summary()

                          SwitchingRegression Results                          
Dep. Variable:                       y   Log-Likelihood:                -1374.2
Model:             SwitchingRegression   AIC:                             2752.
Method:             Maximum Likelihood   BIC:                             2762.
Date:                 Sat, 27 Apr 2019                                         
Time:                         11:37:08                                         
No. Observations:                 1000                                         
Df Residuals:                      998                                         
Df Model:                            1                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const0         0.7984      0.459      1.739      0.082      -0.101       1.698
x10           -2.3848      0.077    -31.098