Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Add SUR predict method + test with grunfeld fitted

  • Loading branch information...
commit 6fc14833be52945ba4b0c8869c8f4669358ccc56 1 parent 45c9ce6
@alexandreyc authored
View
8 statsmodels/examples/example_sysreg.py
@@ -4,7 +4,7 @@
import numpy as np
import statsmodels.api as sm
-from statsmodels.sandbox.sysreg import *
+from statsmodels.sysreg.sysreg import *
#for Python 3 compatibility
from statsmodels.compatnp.py3k import asbytes
@@ -39,9 +39,9 @@
# Note that the results in Greene (5th edition) uses a slightly different
# version of the Grunfeld data. To reproduce Table 14.1 the following changes
# are necessary.
-grun_sys[-2][5] = 261.6
-grun_sys[-2][-3] = 645.2
-grun_sys[-1][11,2] = 232.6
+#grun_sys[-2][5] = 261.6
+#grun_sys[-2][-3] = 645.2
+#grun_sys[-1][11,2] = 232.6
grun_mod = SUR(grun_sys)
grun_res = grun_mod.fit()
View
377 statsmodels/sandbox/sysreg.py
@@ -1,377 +0,0 @@
-from statsmodels.regression.linear_model import GLS
-import numpy as np
-import statsmodels.tools.tools as tools
-from statsmodels.base.model import LikelihoodModelResults
-from scipy import sparse
-
-#http://www.irisa.fr/aladin/wg-statlin/WORKSHOPS/RENNES02/SLIDES/Foschi.pdf
-
-__all__ = ['SUR', 'Sem2SLS']
-
-#probably should have a SystemModel superclass
-# TODO: does it make sense of SUR equations to have
-# independent endogenous regressors? If so, then
-# change docs to LHS = RHS
-#TODO: make a dictionary that holds equation specific information
-#rather than these cryptic lists? Slower to get a dict value?
-#TODO: refine sigma definition
-class SUR(object):
- """
- Seemingly Unrelated Regression
-
- Parameters
- ----------
- sys : list
- [endog1, exog1, endog2, exog2,...] It will be of length 2 x M,
- where M is the number of equations endog = exog.
- sigma : array-like
- M x M array where sigma[i,j] is the covariance between equation i and j
- dfk : None, 'dfk1', or 'dfk2'
- Default is None. Correction for the degrees of freedom
- should be specified for small samples. See the notes for more
- information.
-
- Attributes
- ----------
- cholsigmainv : array
- The transpose of the Cholesky decomposition of `pinv_wexog`
- df_model : array
- Model degrees of freedom of each equation. p_{m} - 1 where p is
- the number of regressors for each equation m and one is subtracted
- for the constant.
- df_resid : array
- Residual degrees of freedom of each equation. Number of observations
- less the number of parameters.
- endog : array
- The LHS variables for each equation in the system.
- It is a M x nobs array where M is the number of equations.
- exog : array
- The RHS variable for each equation in the system.
- It is a nobs x sum(p_{m}) array. Which is just each
- RHS array stacked next to each other in columns.
- history : dict
- Contains the history of fitting the model. Probably not of interest
- if the model is fit with `igls` = False.
- iterations : int
- The number of iterations until convergence if the model is fit
- iteratively.
- nobs : float
- The number of observations of the equations.
- normalized_cov_params : array
- sum(p_{m}) x sum(p_{m}) array
- :math:`\\left[X^{T}\\left(\\Sigma^{-1}\\otimes\\boldsymbol{I}\\right)X\\right]^{-1}`
- pinv_wexog : array
- The pseudo-inverse of the `wexog`
- sigma : array
- M x M covariance matrix of the cross-equation disturbances. See notes.
- sp_exog : CSR sparse matrix
- Contains a block diagonal sparse matrix of the design so that
- exog1 ... exogM are on the diagonal.
- wendog : array
- M * nobs x 1 array of the endogenous variables whitened by
- `cholsigmainv` and stacked into a single column.
- wexog : array
- M*nobs x sum(p_{m}) array of the whitened exogenous variables.
-
- Notes
- -----
- All individual equations are assumed to be well-behaved, homoeskedastic
- iid errors. This is basically an extension of GLS, using sparse matrices.
-
- .. math:: \\Sigma=\\left[\\begin{array}{cccc}
- \\sigma_{11} & \\sigma_{12} & \\cdots & \\sigma_{1M}\\\\
- \\sigma_{21} & \\sigma_{22} & \\cdots & \\sigma_{2M}\\\\
- \\vdots & \\vdots & \\ddots & \\vdots\\\\
- \\sigma_{M1} & \\sigma_{M2} & \\cdots & \\sigma_{MM}\\end{array}\\right]
-
- References
- ----------
- Zellner (1962), Greene (2003)
- """
-#TODO: Does each equation need nobs to be the same?
- def __init__(self, sys, sigma=None, dfk=None):
- if len(sys) % 2 != 0:
- raise ValueError("sys must be a list of pairs of endogenous and \
-exogenous variables. Got length %s" % len(sys))
- if dfk:
- if not dfk.lower() in ['dfk1','dfk2']:
- raise ValueError("dfk option %s not understood" % (dfk))
- self._dfk = dfk
- M = len(sys[1::2])
- self._M = M
-# exog = np.zeros((M,M), dtype=object)
-# for i,eq in enumerate(sys[1::2]):
-# exog[i,i] = np.asarray(eq) # not sure this exog is needed
- # used to compute resids for now
- exog = np.column_stack(np.asarray(sys[1::2][i]) for i in range(M))
-# exog = np.vstack(np.asarray(sys[1::2][i]) for i in range(M))
- self.exog = exog # 2d ndarray exog is better
-# Endog, might just go ahead and reshape this?
- endog = np.asarray(sys[::2])
- self.endog = endog
- self.nobs = float(self.endog[0].shape[0]) # assumes all the same length
-
-# Degrees of Freedom
- df_resid = []
- df_model = []
- [df_resid.append(self.nobs - tools.rank(_)) \
- for _ in sys[1::2]]
- [df_model.append(tools.rank(_) - 1) for _ in sys[1::2]]
- self.df_resid = np.asarray(df_resid)
- self.df_model = np.asarray(df_model)
-
-# "Block-diagonal" sparse matrix of exog
- sp_exog = sparse.lil_matrix((int(self.nobs*M),
- int(np.sum(self.df_model+1)))) # linked lists to build
- self._cols = np.cumsum(np.hstack((0, self.df_model+1)))
- for i in range(M):
- sp_exog[i*self.nobs:(i+1)*self.nobs,
- self._cols[i]:self._cols[i+1]] = sys[1::2][i]
- self.sp_exog = sp_exog.tocsr() # cast to compressed for efficiency
-# Deal with sigma, check shape earlier if given
- if np.any(sigma):
- sigma = np.asarray(sigma) # check shape
- elif sigma == None:
- resids = []
- for i in range(M):
- resids.append(GLS(endog[i],exog[:,
- self._cols[i]:self._cols[i+1]]).fit().resid)
- resids = np.asarray(resids).reshape(M,-1)
- sigma = self._compute_sigma(resids)
- self.sigma = sigma
- self.cholsigmainv = np.linalg.cholesky(np.linalg.pinv(\
- self.sigma)).T
- self.initialize()
-
- def initialize(self):
- self.wendog = self.whiten(self.endog)
- self.wexog = self.whiten(self.sp_exog)
- self.pinv_wexog = np.linalg.pinv(self.wexog)
- self.normalized_cov_params = np.dot(self.pinv_wexog,
- np.transpose(self.pinv_wexog))
- self.history = {'params' : [np.inf]}
- self.iterations = 0
-
- def _update_history(self, params):
- self.history['params'].append(params)
-
- def _compute_sigma(self, resids):
- """
- Computes the sigma matrix and update the cholesky decomposition.
- """
- M = self._M
- nobs = self.nobs
- sig = np.dot(resids, resids.T) # faster way to do this?
- if not self._dfk:
- div = nobs
- elif self._dfk.lower() == 'dfk1':
- div = np.zeros(M**2)
- for i in range(M):
- for j in range(M):
- div[i+j] = ((self.df_model[i]+1) *\
- (self.df_model[j]+1))**(1/2)
- div.reshape(M,M)
- else: # 'dfk2' error checking is done earlier
- div = np.zeros(M**2)
- for i in range(M):
- for j in range(M):
- div[i+j] = nobs - np.max(self.df_model[i]+1,
- self.df_model[j]+1)
- div.reshape(M,M)
-# doesn't handle (#,)
- self.cholsigmainv = np.linalg.cholesky(np.linalg.pinv(sig/div)).T
- return sig/div
-
- def whiten(self, X):
- """
- SUR whiten method.
-
- Parameters
- -----------
- X : list of arrays
- Data to be whitened.
-
- Returns
- -------
- If X is the exogenous RHS of the system.
- ``np.dot(np.kron(cholsigmainv,np.eye(M)),np.diag(X))``
-
- If X is the endogenous LHS of the system.
-
- """
- nobs = self.nobs
- if X is self.endog: # definitely not a robust check
- return np.dot(np.kron(self.cholsigmainv,np.eye(nobs)),
- X.reshape(-1,1))
- elif X is self.sp_exog:
- return (sparse.kron(self.cholsigmainv,
- sparse.eye(nobs,nobs))*X).toarray()#*=dot until cast to array
-
- def fit(self, igls=False, tol=1e-5, maxiter=100):
- """
- igls : bool
- Iterate until estimates converge if sigma is None instead of
- two-step GLS, which is the default is sigma is None.
-
- tol : float
-
- maxiter : int
-
- Notes
- -----
- This ia naive implementation that does not exploit the block
- diagonal structure. It should work for ill-conditioned `sigma`
- but this is untested.
- """
-
- if not np.any(self.sigma):
- self.sigma = self._compute_sigma(self.endog, self.exog)
- M = self._M
- beta = np.dot(self.pinv_wexog, self.wendog)
- self._update_history(beta)
- self.iterations += 1
- if not igls:
- sur_fit = SysResults(self, beta, self.normalized_cov_params)
- return sur_fit
-
- conv = self.history['params']
- while igls and (np.any(np.abs(conv[-2] - conv[-1]) > tol)) and \
- (self.iterations < maxiter):
- fittedvalues = (self.sp_exog*beta).reshape(M,-1)
- resids = self.endog - fittedvalues # don't attach results yet
- self.sigma = self._compute_sigma(resids) # need to attach for compute?
- self.wendog = self.whiten(self.endog)
- self.wexog = self.whiten(self.sp_exog)
- self.pinv_wexog = np.linalg.pinv(self.wexog)
- self.normalized_cov_params = np.dot(self.pinv_wexog,
- np.transpose(self.pinv_wexog))
- beta = np.dot(self.pinv_wexog, self.wendog)
- self._update_history(beta)
- self.iterations += 1
- sur_fit = SysResults(self, beta, self.normalized_cov_params)
- return sur_fit
-
- def predict(self, design):
- pass
-
-#TODO: Should just have a general 2SLS estimator to subclass
-# for IV, FGLS, etc.
-# Also should probably have SEM class and estimators as subclasses
-class Sem2SLS(object):
- """
- Two-Stage Least Squares for Simultaneous equations
-
- Parameters
- ----------
- sys : list
- [endog1, exog1, endog2, exog2,...] It will be of length 2 x M,
- where M is the number of equations endog = exog.
- indep_endog : dict
- A dictionary mapping the equation to the column numbers of the
- the independent endogenous regressors in each equation.
- It is assumed that the system is inputed as broken up into
- LHS and RHS. For now, the values of the dict have to be sequences.
- Note that the keys for the equations should be zero-indexed.
- instruments : array
- Array of the exogenous independent variables.
-
- Notes
- -----
- This is unfinished, and the design should be refactored.
- Estimation is done by brute force and there is no exploitation of
- the structure of the system.
- """
- def __init__(self, sys, indep_endog=None, instruments=None):
- if len(sys) % 2 != 0:
- raise ValueError("sys must be a list of pairs of endogenous and \
-exogenous variables. Got length %s" % len(sys))
- M = len(sys[1::2])
- self._M = M
-# The lists are probably a bad idea
- self.endog = sys[::2] # these are just list containers
- self.exog = sys[1::2]
- self._K = [tools.rank(_) for _ in sys[1::2]]
-# fullexog = np.column_stack((_ for _ in self.exog))
-
- self.instruments = instruments
-
- # Keep the Y_j's in a container to get IVs
- instr_endog = {}
- [instr_endog.setdefault(_,[]) for _ in indep_endog.keys()]
-
- for eq_key in indep_endog:
- for varcol in indep_endog[eq_key]:
- instr_endog[eq_key].append(self.exog[eq_key][:,varcol])
- # ^ copy needed?
-# self._instr_endog = instr_endog
-
- self._indep_endog = indep_endog
- _col_map = np.cumsum(np.hstack((0,self._K))) # starting col no.s
-# move this check to whiten since we're not going to build a full exog?
- for eq_key in indep_endog:
- try:
- iter(indep_endog[eq_key])
- except:
-# eq_key = [eq_key]
- raise TypeError("The values of the indep_exog dict must be\
- iterable. Got type %s for converter %s" % (type(del_col)))
-# for del_col in indep_endog[eq_key]:
-# fullexog = np.delete(fullexog, _col_map[eq_key]+del_col, 1)
-# _col_map[eq_key+1:] -= 1
-
-# Josef's example for deleting reoccuring "rows"
-# fullexog = np.unique(fullexog.T.view([('',fullexog.dtype)]*\
-# fullexog.shape[0])).view(fullexog.dtype).reshape(\
-# fullexog.shape[0],-1)
-# From http://article.gmane.org/gmane.comp.python.numeric.general/32276/
-# Or Jouni' suggetsion of taking a hash:
-# http://www.mail-archive.com/numpy-discussion@scipy.org/msg04209.html
-# not clear to me how this would work though, only if they are the *same*
-# elements?
-# self.fullexog = fullexog
- self.wexog = self.whiten(instr_endog)
-
-
- def whiten(self, Y):
- """
- Runs the first stage of the 2SLS.
-
- Returns the RHS variables that include the instruments.
- """
- wexog = []
- indep_endog = self._indep_endog # this has the col mapping
-# fullexog = self.fullexog
- instruments = self.instruments
- for eq in range(self._M): # need to go through all equations regardless
- instr_eq = Y.get(eq, None) # Y has the eq to ind endog array map
- newRHS = self.exog[eq].copy()
- if instr_eq:
- for i,LHS in enumerate(instr_eq):
- yhat = GLS(LHS, self.instruments).fit().fittedvalues
- newRHS[:,indep_endog[eq][i]] = yhat
- # this might fail if there is a one variable column (nobs,)
- # in exog
- wexog.append(newRHS)
- return wexog
-
- def fit(self):
- """
- """
- delta = []
- wexog = self.wexog
- endog = self.endog
- for j in range(self._M):
- delta.append(GLS(endog[j], wexog[j]).fit().params)
- return delta
-
-class SysResults(LikelihoodModelResults):
- """
- Not implemented yet.
- """
- def __init__(self, model, params, normalized_cov_params=None, scale=1.):
- super(SysResults, self).__init__(model, params,
- normalized_cov_params, scale)
- self._get_results()
-
- def _get_results(self):
- pass
View
214 statsmodels/sysreg/example_sysreg.py
@@ -1,214 +0,0 @@
-'''
-This file will be soon deprecated because this
-example was converted to a unit-test
-'''
-
-"""Example: statsmodels.sandbox.sysreg
-"""
-#TODO: this is going to change significantly once we have a panel data structure
-
-import numpy as np
-import statsmodels.api as sm
-from statsmodels.sandbox.sysreg import *
-
-#for Python 3 compatibility
-from statsmodels.compatnp.py3k import asbytes
-
-# Seemingly Unrelated Regressions (SUR) Model
-
-# This example uses the subset of the Grunfeld data in Greene's Econometric
-# Analysis Chapter 14 (5th Edition)
-
-grun_data = sm.datasets.grunfeld.load()
-
-firms = ['Chrysler', 'General Electric', 'General Motors',
- 'US Steel', 'Westinghouse']
-#for Python 3 compatibility
-firms = map(asbytes, firms)
-
-grun_exog = grun_data.exog
-grun_endog = grun_data.endog
-
-# Right now takes SUR takes a list of arrays
-# The array alternates between the LHS of an equation and RHS side of an
-# equation
-# This is very likely to change
-grun_sys = []
-for i in firms:
- index = grun_exog['firm'] == i
- grun_sys.append(grun_endog[index])
- exog = grun_exog[index][['value','capital']].view(float).reshape(-1,2)
- exog = sm.add_constant(exog, prepend=True)
- grun_sys.append(exog)
-
-# Note that the results in Greene (5th edition) uses a slightly different
-# version of the Grunfeld data. To reproduce Table 14.1 the following changes
-# are necessary.
-#grun_sys[-2][5] = 261.6
-#grun_sys[-2][-3] = 645.2
-#grun_sys[-1][11,2] = 232.6
-
-grun_mod = SUR(grun_sys)
-grun_res = grun_mod.fit()
-print "Results for the 2-step GLS"
-print "Compare to Greene Table 14.1, 5th edition"
-print grun_res.params
-# or you can do an iterative fit
-# you have to define a new model though this will be fixed
-# TODO: note the above
-print "Results for iterative GLS (equivalent to MLE)"
-print "Compare to Greene Table 14.3"
-#TODO: these are slightly off, could be a convergence issue
-# or might use a different default DOF correction?
-grun_imod = SUR(grun_sys)
-grun_ires = grun_imod.fit(igls=True)
-print grun_ires.params
-
-# Two-Stage Least Squares for Simultaneous Equations
-#TODO: we are going to need *some kind* of formula framework
-
-# This follows the simple macroeconomic model given in
-# Greene Example 15.1 (5th Edition)
-# The data however is from statsmodels and is not the same as
-# Greene's
-
-# The model is
-# consumption: c_{t} = \alpha_{0} + \alpha_{1}y_{t} + \alpha_{2}c_{t-1} + \epsilon_{t1}
-# investment: i_{t} = \beta_{0} + \beta_{1}r_{t} + \beta_{2}\left(y_{t}-y_{t-1}\right) + \epsilon_{t2}
-# demand: y_{t} = c_{t} + I_{t} + g_{t}
-
-# See Greene's Econometric Analysis for more information
-
-# Load the data
-macrodata = sm.datasets.macrodata.load().data
-
-# Not needed, but make sure the data is sorted
-macrodata = np.sort(macrodata, order=['year','quarter'])
-
-# Impose the demand restriction
-y = macrodata['realcons'] + macrodata['realinv'] + macrodata['realgovt']
-
-# Build the system
-macro_sys = []
-# First equation LHS
-macro_sys.append(macrodata['realcons'][1:]) # leave off first date
-# First equation RHS
-exog1 = np.column_stack((y[1:],macrodata['realcons'][:-1]))
-#TODO: it might be nice to have "lag" and "lead" functions
-exog1 = sm.add_constant(exog1, prepend=True)
-macro_sys.append(exog1)
-# Second equation LHS
-macro_sys.append(macrodata['realinv'][1:])
-# Second equation RHS
-exog2 = np.column_stack((macrodata['tbilrate'][1:], np.diff(y)))
-exog2 = sm.add_constant(exog2, prepend=True)
-macro_sys.append(exog2)
-
-# We need to say that y_{t} in the RHS of equation 1 is an endogenous regressor
-# We will call these independent endogenous variables
-# Right now, we use a dictionary to declare these
-indep_endog = {0 : [1]}
-
-# We also need to create a design of our instruments
-# This will be done automatically in the future
-instruments = np.column_stack((macrodata[['realgovt',
- 'tbilrate']][1:].view(float).reshape(-1,2),macrodata['realcons'][:-1],
- y[:-1]))
-instruments = sm.add_constant(instruments, prepend=True)
-macro_mod = Sem2SLS(macro_sys, indep_endog=indep_endog, instruments=instruments)
-# Right now this only returns parameters
-macro_params = macro_mod.fit()
-print "The parameters for the first equation are correct."
-print "The parameters for the second equation are not."
-print macro_params
-
-#TODO: Note that the above is incorrect, because we have no way of telling the
-# model that *part* of the y_{t} - y_{t-1} is an independent endogenous variable
-# To correct for this we would have to do the following
-y_instrumented = macro_mod.wexog[0][:,1]
-whitened_ydiff = y_instrumented - y[:-1]
-wexog = np.column_stack((macrodata['tbilrate'][1:],whitened_ydiff))
-wexog = sm.add_constant(wexog, prepend=True)
-correct_params = sm.GLS(macrodata['realinv'][1:], wexog).fit().params
-
-print "If we correctly instrument everything, then these are the parameters"
-print "for the second equation"
-print correct_params
-print "Compare to output of R script statsmodels/sandbox/tests/macrodata.s"
-
-print '\nUsing IV2SLS'
-from statsmodels.sandbox.regression.gmm import IV2SLS
-miv = IV2SLS(macro_sys[0], macro_sys[1], instruments)
-resiv = miv.fit()
-print "equation 1"
-print resiv.params
-miv2 = IV2SLS(macro_sys[2], macro_sys[3], instruments)
-resiv2 = miv2.fit()
-print "equation 2"
-print resiv2.params
-
-### Below is the same example using Greene's data ###
-
-run_greene = 0
-if run_greene:
- try:
- data3 = np.genfromtxt('/home/skipper/school/MetricsII/Greene \
-TableF5-1.txt', names=True)
- except:
- raise ValueError, "Based on Greene TableF5-1. You should download it \
-from his web site and edit this script accordingly."
-
- # Example 15.1 in Greene 5th Edition
-# c_t = constant + y_t + c_t-1
-# i_t = constant + r_t + (y_t - y_t-1)
-# y_t = c_t + i_t + g_t
- sys3 = []
- sys3.append(data3['realcons'][1:]) # have to leave off a beg. date
-# impose 3rd equation on y
- y = data3['realcons'] + data3['realinvs'] + data3['realgovt']
-
- exog1 = np.column_stack((y[1:],data3['realcons'][:-1]))
- exog1 = sm.add_constant(exog1)
- sys3.append(exog1)
- sys3.append(data3['realinvs'][1:])
- exog2 = np.column_stack((data3['tbilrate'][1:],
- np.diff(y)))
- # realint is missing 1st observation
- exog2 = sm.add_constant(exog2)
- sys3.append(exog2)
- indep_endog = {0 : [0]} # need to be able to say that y_1 is an instrument..
- instruments = np.column_stack((data3[['realgovt',
- 'tbilrate']][1:].view(float).reshape(-1,2),data3['realcons'][:-1],
- y[:-1]))
- instruments = sm.add_constant(instruments)
- sem_mod = Sem2SLS(sys3, indep_endog = indep_endog, instruments=instruments)
- sem_params = sem_mod.fit() # first equation is right, but not second?
- # should y_t in the diff be instrumented?
- # how would R know this in the script?
- # well, let's check...
- y_instr = sem_mod.wexog[0][:,0]
- wyd = y_instr - y[:-1]
- wexog = np.column_stack((data3['tbilrate'][1:],wyd))
- wexog = sm.add_constant(wexog)
- params = sm.GLS(data3['realinvs'][1:], wexog).fit().params
-
- print "These are the simultaneous equation estimates for Greene's \
-example 13-1 (Also application 13-1 in 6th edition."
- print sem_params
- print "The first set of parameters is correct. The second set is not."
- print "Compare to the solution manual at \
-http://pages.stern.nyu.edu/~wgreene/Text/econometricanalysis.htm"
- print "The reason is the restriction on (y_t - y_1)"
- print "Compare to R script GreeneEx15_1.s"
- print "Somehow R carries y.1 in yd to know that it needs to be \
-instrumented"
- print "If we replace our estimate with the instrumented one"
- print params
- print "We get the right estimate"
- print "Without a formula framework we have to be able to do restrictions."
-# yep!, but how in the world does R know this when we just fed it yd??
-# must be implicit in the formula framework...
-# we are going to need to keep the two equations separate and use
-# a restrictions matrix. Ugh, is a formula framework really, necessary to get
-# around this?
-
View
17 statsmodels/sysreg/sysreg.py
@@ -2,7 +2,7 @@
import numpy as np
import statsmodels.tools.tools as tools
from statsmodels.base.model import LikelihoodModelResults
-from scipy import sparse
+from scipy import sparse, linalg
#http://www.irisa.fr/aladin/wg-statlin/WORKSHOPS/RENNES02/SLIDES/Foschi.pdf
@@ -252,8 +252,19 @@ def fit(self, igls=False, tol=1e-5, maxiter=100):
sur_fit = SysResults(self, beta, self.normalized_cov_params)
return sur_fit
- def predict(self, design):
- pass
+ def predict(self, params, exog=None):
+ if exog is None:
+ exog = self.exog
+
+ designs = [] # list of individual design (one for each eq)
+ cur_col = 0
+ for eq in range(self._M):
+ designs.append(exog[:,cur_col:cur_col+self.df_model[eq]+1])
+ cur_col += self.df_model[eq]+1
+
+ aggr_design = linalg.block_diag(*designs)
+ return np.dot(aggr_design, params)
+
#TODO: Should just have a general 2SLS estimator to subclass
# for IV, FGLS, etc.
View
2  statsmodels/sysreg/tests/results/grunfeld-sur.R
@@ -6,4 +6,6 @@ library('systemfit')
panel <- plm.data(data,c('firm','year'))
formula <- invest ~ value + capital
SUR <- systemfit(formula,method='SUR',data=panel)
+f <- fitted(SUR)
+ff <- c(f[,'Chrysler'],f[,'General.Electric'],f[,'General.Motors'],f[,'US.Steel'],f[,'Westinghouse'])
View
18 statsmodels/sysreg/tests/results/results_sysreg.py
@@ -10,8 +10,22 @@ class GrunfeldSUR(object):
For more details see sysreg/tests/results/grunfeld-sur.R
'''
def __init__(self):
- self.params =np.array([0.9979992,0.06886083,0.3083878,-21.1374,
+ self.params = np.array([0.9979992,0.06886083,0.3083878,-21.1374,
0.03705313,0.1286866,-168.1134,0.1219063,0.3821666,62.25631,
0.1214024,0.3691114,1.407487,0.05635611,0.04290209]).reshape((15,1))
-
+ self.fittedvalues = np.array([32.98547,61.83516,72.56515,47.12665,
+ 67.63205,71.80774,68.5076,51.31181,62.20854,67.74787,76.11978,
+ 88.6971,70.72036,82.81307,87.10292,99.08188,119.4633,140.6773,
+ 176.6952,177.371,34.82255,66.98919,97.91866,74.54072,84.67318,
+ 81.88021,75.24862,74.73898,84.85019,82.72565,94.38241,105.2126,
+ 98.98109,108.2389,111.4822,121.8484,132.6644,149.3613,169.7257,
+ 195.5151,208.2453,420.2794,548.5702,252.2227,435.4257,477.1924,
+ 484.2357,343.427,426.9885,442.7958,523.2972,583.0448,552.8092,
+ 581.1657,672.8126,709.7192,882.6026,978.9525,1272.014,1364.599,
+ 247.5132,300.2828,430.7577,377.0541,415.2984,423.5218,447.7405,
+ 435.7832,414.6501,385.4872,365.7911,362.2243,378.1208,372.9127,
+ 394.2292,397.9648,466.4802,488.372,539.0389,566.277,12.27691,
+ 30.52156,42.80857,33.76598,31.71523,37.96421,33.22941,35.64298,
+ 39.81141,40.63853,46.91736,47.95589,38.93935,44.33515,40.3917,
+ 43.0696,47.76244,56.34706,76.16779,77.56886]).reshape((100,1))
View
6 statsmodels/sysreg/tests/test_sysreg.py
@@ -2,12 +2,14 @@
from numpy.testing import *
import statsmodels.api as sm
-from statsmodels.sandbox.sysreg import *
+from statsmodels.sysreg.sysreg import *
class CheckSysregResults(object):
decimal_params = 4
def test_params(self):
- assert_almost_equal(self.res1.params, self.res2.params, self.decimal_params)
+ assert_almost_equal(self.res1.params, self.res2.params, self.decimal_params)
+ def test_fittedvalues(self):
+ assert_almost_equal(self.res1.predict(), self.res2.fittedvalues, 3) # fail with decimal error >= 4
class TestSUR(CheckSysregResults):
@classmethod
Please sign in to comment.
Something went wrong with that request. Please try again.