In [3]:
import pandas as pd
import math
import numpy as np
import statsmodels.api as sm
from statsmodels.base.model import GenericLikelihoodModel
import matplotlib.pyplot as plt
from scipy import stats
import warnings

from statsmodels.tsa.regime_switching.markov_autoregression import MarkovAutoregression
from statsmodels.tsa.regime_switching.markov_regression import MarkovRegression

In [4]:
#generate some fake data

n = 1000
beta01, beta11 = 5,-3
beta02, beta12 = 2, 4

#set up regression mixture
x1 = np.random.uniform(0, 10, size=400)
x2 = np.random.uniform(0, 10, size=600)

y1 = beta01 + beta11*x1 + np.random.normal(scale=4.0, size=400)
y2 = beta02 + beta12*x2 + np.random.normal(scale=1.0,size=600)

x = np.concatenate([x1, x2])
y = np.concatenate([y1, y2])


#set up 2 component mixture
a1 = np.random.normal(0, 1, size=600)
a2 = np.random.normal(5, 3, size=400)
a = np.concatenate([a1,a2])

In [38]:
class SwitchingRegression(GenericLikelihoodModel):
    
    def __init__(self, endog, exog, ncomp=2, switch_var=True):
        super(SwitchingRegression, self).__init__(endog, exog)
        
        nobs, k = self.exog.shape
        self.ncomp = ncomp
        self.nparams = (k+2)*ncomp
        
        #adjust param names
        param_names = []
        
        for comp in range(ncomp):
            for name in self.data.xnames:
                param_names.append(name+str(comp))
            param_names.append('sigma'+str(comp))
            
        for comp in range(ncomp):
            param_names.append('lambda'+str(comp))
        
        self.data.xnames = param_names
        
        
    
    def nloglikeobs(self, params):
        """do maximum likelihood estimation"""

        nobs, k = self.exog.shape
        likelihood = []
        lambs = params[(k+1)*self.ncomp:]
        lambs = np.exp(lambs)/(np.exp(lambs).sum())

        for comp in range(self.ncomp):
            #get params
            comp_params = params[comp*(k+1): (comp+1)*(k+1)]
            lamb = lambs[comp]
            beta = comp_params[:-1]
            sigma = comp_params[-1]
            
            #calculate likelihood from formula
            beta = np.tile(beta,nobs).reshape(nobs,k)
            means = (beta*self.exog).sum(axis=1)
            comp_like = stats.norm.pdf(self.endog, loc=means, scale=sigma )
            comp_like = lamb*comp_like
            likelihood.append(comp_like)

        likelihood = np.array(likelihood).sum(axis=0)
        return -np.log(likelihood).sum()


model = SwitchingRegression(y, sm.add_constant(x))
result = model.fit(start_params = np.random.uniform(low=.5,high=1.,size=(2+2)*2))
print result.summary()



                          SwitchingRegression Results                          
Dep. Variable:                       y   Log-Likelihood:                -4342.3
Model:             SwitchingRegression   AIC:                             8689.
Method:             Maximum Likelihood   BIC:                             8698.
Date:                 Fri, 03 May 2019                                         
Time:                         17:13:35                                         
No. Observations:                 1000                                         
Df Residuals:                      998                                         
Df Model:                            1                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const0       -14.5552        nan        nan        nan         nan         nan
x10           -7.0570        nan        nan



In [10]:
data = pd.read_csv('data/milk.csv')
print data.columns

reg1 = sm.add_constant(data[['WW','FMO','INC']]).dropna()
reg1 = reg1[reg1['WW']<.4]
reg1[['WW','FMO']] = np.log(reg1[['WW','FMO']])


model = SwitchingRegression(reg1['WW'],reg1[['const','FMO']])
result = model.fit()
print result.summary()

Index([u'VENDOR', u'WW', u'WC', u'LFW', u'LFC', u'WIN', u'SYSTEM', u'YEAR',
       u'MONTH', u'DAY', u'FMOZONE', u'ESC', u'COOLER', u'QLFC', u'QLFW',
       u'QWW', u'QWC', u'ESTQTY', u'DEL', u'MILES', u'NUMSCHL', u'NUMWIN',
       u'POPUL', u'ADJPOP', u'NUM', u'GAS', u'FMO', u'INC'],
      dtype='object')
                          SwitchingRegression Results                          
Dep. Variable:                      WW   Log-Likelihood:                 7580.3
Model:             SwitchingRegression   AIC:                        -1.515e+04
Method:             Maximum Likelihood   BIC:                        -1.514e+04
Date:                 Sat, 27 Apr 2019                                         
Time:                         13:14:51                                         
No. Observations:                 4042                                         
Df Residuals:                     4039                                         
Df Model:                            2              