Skip to content

Commit

Permalink
ENH: Add log likelihood and variance decomposition
Browse files Browse the repository at this point in the history
Refactor panel results into separate classes
Change handling of effects df
Add log likelihood
Add variance decomposition for models with effects
Additional testing against Stata
  • Loading branch information
bashtage committed Apr 11, 2017
1 parent 58ec0ae commit 457c79c
Show file tree
Hide file tree
Showing 16 changed files with 1,890 additions and 1,822 deletions.
6 changes: 3 additions & 3 deletions doc/source/iv/introduction.rst
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,9 @@ GMM and GMM-CUE Estimation
:class:`~linearmodels.iv.gmm.HeteroskedasticWeightMatrix`.
* 'kernel' - Allows for both heteroskedasticity and autocorrrelation in the
moment conditions. See :class:`~linearmodels.iv.gmm.KernelWeightMatrix`.
* 'cluster' - Allows for a one-way cluster structure where moment conditions
within a cluster are correlated.
See :class:`~linearmodels.iv.gmm.OneWayClusteredWeightMatrix`.
* 'cluster' - Allows for a one- and two-way cluster structure where moment
conditions within a cluster are correlated.
See :class:`~linearmodels.iv.gmm.ClusteredWeightMatrix`.

Each weight type accepts a set of additional parameters which are similar to
those for the corresponding covariance estimator.
Expand Down
2 changes: 1 addition & 1 deletion doc/source/panel/covariance.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,6 @@ Panel Model Covariance Estimators
:members:
:inherited-members:

.. autoclass:: OneWayClusteredCovariance
.. autoclass:: ClusteredCovariance
:members:
:inherited-members:
18 changes: 18 additions & 0 deletions doc/source/panel/faq.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
Implementation Choices
----------------------

While the implmentation of the panel estimators is similar to Stata, there
are some differenced worth noting.

Clustered Covariance with Fixed Effects
=======================================
By default, Stata does not adjust the covariance estimator for the degrees
of freedom used to estimate included effects. This can have a substantial
effect on the estimated parameter covariance when the number of time periods
is small (e.g. 2 or 3). It also means running LSDV and using a model with
fixed effects will produce different results. By default, covariance estimators
are always adjusted for degrees of freedom consumed by effects. This can be
overridden using by setting the fit option ``count_effects=False``.

R2 definitions
==============
2 changes: 2 additions & 0 deletions doc/source/panel/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,5 @@ Panel Data Model Estimation
examples/using-formulas.ipynb
reference
mathematical-formula
faq

53 changes: 5 additions & 48 deletions experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,53 +3,10 @@
from linearmodels.panel.data import PanelData
from linearmodels.tests.panel._utility import generate_data
from linearmodels import PanelOLS
from linearmodels import PooledOLS

data = generate_data(0.00, 'pandas', ntk=(101, 5, 5), other_effects=1, const=False)

res = PanelOLS(data.y, data.x).fit()
print(res.f_pooled)
# # Is weighted demeaning identical to dummy regression?
# y = PanelData(data.y)
# x = PanelData(data.x)
# w = PanelData(data.w)
#
# missing = y.isnull | x.isnull | w.isnull
#
# y.drop(missing)
# x.drop(missing)
# w.drop(missing)
#
# ydm = y.demean('entity', weights=w)
# xdm = x.demean('entity', weights=w)
# beta = np.linalg.lstsq(xdm.values2d, ydm.values2d)[0]
#
# root_w = np.sqrt(w.values2d)
# d = y.dummies().values
# wy = root_w * y.values2d
# wx = root_w * x.values2d
# wd = root_w * d
#
# wy = wy - wd @ np.linalg.lstsq(wd, wy)[0]
# wx = wx - wd @ np.linalg.lstsq(wd, wx)[0]
# beta2 = np.linalg.lstsq(wx, wy)[0]
# print(beta)
# print(beta2)
#
#
# wy = root_w * y.values2d
# wx = root_w * x.values2d
# muy = np.l
#
#
#
#
# wd = wd - root_w @ np.linalg.lstsq(root_w, wd)[0]
# wy = wy - wd @ np.linalg.lstsq(wd, wy)[0]
# wx = wx - wd @ np.linalg.lstsq(wd, wx)[0]
# beta3 = np.linalg.lstsq(wx, wy)[0]
# print(beta3)
# wy = wy - root_w @ np.linalg.lstsq(root_w, wy)[0]
# wx = wx - root_w @ np.linalg.lstsq(root_w, wx)[0]
# beta4 = np.linalg.lstsq(wx, wy)[0]
# print(beta4)
#
res = PooledOLS(data.y, data.x).fit()
res2 = PanelOLS(data.y, data.x).fit()
res3 = PanelOLS(data.y, data.x, entity_effect=True).fit()
res4 = PanelOLS(data.y, data.x, entity_effect=True, time_effect=True).fit()
17 changes: 11 additions & 6 deletions linearmodels/panel/covariance.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ class HomoskedasticCovariance(object):
Flag indicating whether to debias the estimator
extra_df : int, optional
Additional degrees of freedom consumed by models beyond the number of
columns in x, e.g., fixed effects
columns in x, e.g., fixed effects. Covariance estiamtors are always
adjusted for extra_df irrespective of the setting of debiased
Notes
-----
Expand Down Expand Up @@ -52,9 +53,9 @@ def __init__(self, y, x, params, *, debiased=False, extra_df=0):
self._debiased = debiased
self._extra_df = extra_df
self._nobs, self._nvar = x.shape
self._nobs_eff = self._nobs
self._nobs_eff = self._nobs - extra_df
if debiased:
self._nobs_eff -= (self._nvar + extra_df)
self._nobs_eff -= self._nvar
self._scale = self._nobs / self._nobs_eff
self._name = 'Unadjusted'

Expand Down Expand Up @@ -97,7 +98,8 @@ class HeteroskedasticCovariance(HomoskedasticCovariance):
Flag indicating whether to debias the estimator
extra_df : int, optional
Additional degrees of freedom consumed by models beyond the number of
columns in x, e.g., fixed effects
columns in x, e.g., fixed effects. Covariance estiamtors are always
adjusted for extra_df irrespective of the setting of debiased
Notes
-----
Expand Down Expand Up @@ -159,7 +161,8 @@ class ClusteredCovariance(HomoskedasticCovariance):
Flag indicating whether to debias the estimator
extra_df : int, optional
Additional degrees of freedom consumed by models beyond the number of
columns in x, e.g., fixed effects
columns in x, e.g., fixed effects. Covariance estiamtors are always
adjusted for extra_df irrespective of the setting of debiased
cluster : ndarray, optional
nobs by 1 or nobs by 2 array of cluster group ids
group_debias : bool, optional
Expand Down Expand Up @@ -200,7 +203,9 @@ class ClusteredCovariance(HomoskedasticCovariance):

def __init__(self, y, x, params, *, debiased=False, extra_df=0, clusters=None,
group_debias=False):
super(ClusteredCovariance, self).__init__(y, x, params, debiased=debiased, extra_df=extra_df)
super(ClusteredCovariance, self).__init__(y, x, params,
debiased=debiased,
extra_df=extra_df)
if clusters is None:
clusters = np.arange(self._x.shape[0])
clusters = np.asarray(clusters).squeeze()
Expand Down
12 changes: 6 additions & 6 deletions linearmodels/panel/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,10 @@ class PanelData(object):
can be treated as-if being (1, nobs, nentity).
If the 2-d input is a pandas DataFrame with a 2-level MultiIndex then the
input is treated differently. Index level 0 is assumed ot be entity.
Index level 1 is time. The columns are the variables. This is the most
precise format to use since pandas Panels do not preserve all variable
type information across transformations between Panel and MultiIndex
input is treated differently. Index level 0 is assumed ot be entity.
Index level 1 is time. The columns are the variables. This is the most
precise format to use since pandas Panels do not preserve all variable
type information across transformations between Panel and MultiIndex
DataFrame. MultiIndex Series are also accepted and treated as single
column MultiIndex DataFrames.
Expand Down Expand Up @@ -147,7 +147,7 @@ def drop(self, locs):
Parameters
----------
locs : array
Booleam array indicating observations to drop with reference to
Booleam array indicating observations to drop with reference to
the dataframe view of the data
"""
self._frame = self._frame.loc[~locs.ravel()]
Expand Down Expand Up @@ -223,7 +223,7 @@ def time_ids(self):
def _demean_both(self, weights):
"""
Entity and time demean
Parameters
----------
weights : PanelData, optional
Expand Down
61 changes: 37 additions & 24 deletions linearmodels/panel/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@
from patsy.highlevel import ModelDesc, dmatrices
from patsy.missing import NAAction

from linearmodels.panel.covariance import ClusteredCovariance, HeteroskedasticCovariance, HomoskedasticCovariance
from linearmodels.panel.covariance import ClusteredCovariance, HeteroskedasticCovariance, \
HomoskedasticCovariance
from linearmodels.panel.data import PanelData
from linearmodels.panel.results import PanelResults
from linearmodels.utility import AttrDict, InapplicableTestStatistic, InvalidTestStatistic, MissingValueWarning, \
from linearmodels.panel.results import PanelEffectsResults, PanelResults
from linearmodels.utility import AttrDict, InapplicableTestStatistic, InvalidTestStatistic, \
MissingValueWarning, \
WaldTestStatistic, has_constant, missing_value_warning_msg


Expand Down Expand Up @@ -48,17 +50,12 @@ class AmbiguityError(Exception):


# TODO: Bootstrap covariance
# TODO: Add likelihood and possibly AIC/BIC
# TODO: Possibly add AIC/BIC
# TODO: Correlation between FE and XB
# TODO: Component variance estimators
# TODO: Documentation
# TODO: Example notebooks
# TODO: Formal test of other outputs
# TODO: Test Pooled F-stat
# TODO: Add fast path for no-constant, entity or time effect <- Depends on whether it is easy to compute FE



# TODO: Add fast path for no-constant, entity or time effect

class PooledOLS(object):
r"""
Expand Down Expand Up @@ -328,7 +325,7 @@ def _rsquared(self, params, reweight=False):
weps = wy - wx @ params
residual_ss = float(weps.T @ weps)
total_ss = float(wy.T @ wy)
if self.dependent.nobs == 1:
if self.dependent.nobs == 1 or (self.exog.nvar == 1 and self.has_constant):
r2w = 0
else:
r2w = 1 - residual_ss / total_ss
Expand All @@ -342,13 +339,16 @@ def _postestimation(self, params, cov, debiased, df_resid, weps, y, x, root_w):
f_pooled = InapplicableTestStatistic(reason='Model has no effects',
name='Pooled F-stat')
entity_info, time_info, other_info = self._info()
nobs = weps.shape[0]
sigma2 = float(weps.T @ weps / nobs)
loglik = -0.5 * nobs * (np.log(2 * np.pi) + np.log(sigma2) + 1)
res = AttrDict(params=params, deferred_cov=cov.deferred_cov,
deferred_f=deferred_f, f_stat=f_stat,
debiased=debiased, name=self._name, var_names=self.exog.vars,
r2w=r2w, r2b=r2b, r2=r2w, r2o=r2o, s2=cov.s2,
model=self, cov_type=cov.name, index=self.dependent.index,
entity_info=entity_info, time_info=time_info, other_info=other_info,
f_pooled=f_pooled)
f_pooled=f_pooled, loglik=loglik)
return res

@property
Expand Down Expand Up @@ -753,7 +753,7 @@ def stats(ids, name):

return entity_info, time_info, other_info

def fit(self, *, cov_type='unadjusted', debiased=False, **cov_config):
def fit(self, *, cov_type='unadjusted', debiased=False, count_effects=True, **cov_config):
"""
Estimate model parameters
Expand All @@ -764,6 +764,9 @@ def fit(self, *, cov_type='unadjusted', debiased=False, **cov_config):
debiased : bool, optional
Flag indicating whether to debiased the covariance estimator using
a degree of freedom adjustment.
count_effects : bool, optional
Flag indicating that the covariance estimator should be adjusted
to account for the estimation of effects in the model.
**cov_config
Additional covariance-specific options. See Notes.
Expand Down Expand Up @@ -795,9 +798,9 @@ def fit(self, *, cov_type='unadjusted', debiased=False, **cov_config):
* ``cluster_time`` - Boolean indicating to use time clusters
"""

unweighted = np.all(self.weights.values2d == 1.0)
weighted = np.any(self.weights.values2d != 1.0)
y_effects = x_effects = 0
if unweighted and not self.other_effect:
if not weighted and not self.other_effect:
y, x, ybar = self._fast_path()
else:
y, x, ybar, y_effects, x_effects = self._slow_path()
Expand All @@ -821,23 +824,30 @@ def fit(self, *, cov_type='unadjusted', debiased=False, **cov_config):
raise AbsorbingEffectError(absorbing_error_msg)

params = np.linalg.lstsq(x, y)[0]

nobs = self.dependent.dataframe.shape[0]
df_model = x.shape[1] + neffects
df_resid = y.shape[0] - df_model
df_resid = nobs - df_model
extra_df = neffects if count_effects else 0
cov_est, cov_config = self._choose_cov(cov_type, **cov_config)
cov = cov_est(y, x, params, debiased=debiased, extra_df=neffects, **cov_config)
cov = cov_est(y, x, params, debiased=debiased, extra_df=extra_df, **cov_config)
weps = y - x @ params
eps = weps
if not unweighted:
_y = self.dependent.values2d
_x = self.exog.values2d
_y = self.dependent.values2d
_x = self.exog.values2d
if weighted:
eps = (_y - y_effects) - (_x - x_effects) @ params
if self.has_constant:
# Correction since y_effecs and x_effects @ params add mean
w = self.weights.values2d
eps -= (w * eps).sum() / w.sum()
resid_ss = float(weps.T @ weps)

eps_effects = _y - _x @ params
sigma2_tot = float(eps_effects.T @ eps_effects / nobs)
sigma2_eps = float(eps.T @ eps / nobs)
sigma2_effects = sigma2_tot - sigma2_eps
rho = sigma2_effects / sigma2_tot

resid_ss = float(weps.T @ weps)
if self.has_constant:
mu = ybar
else:
Expand Down Expand Up @@ -865,8 +875,11 @@ def fit(self, *, cov_type='unadjusted', debiased=False, **cov_config):

res.update(dict(df_resid=df_resid, df_model=df_model, nobs=y.shape[0],
residual_ss=resid_ss, total_ss=total_ss, wresids=weps, resids=eps,
r2=r2))
return PanelResults(res)
r2=r2, entity_effect=self.entity_effect, time_effect=self.time_effect,
other_effect=self.other_effect, sigma2_eps=sigma2_eps,
sigma2_effects=sigma2_effects, rho=rho))

return PanelEffectsResults(res)


class BetweenOLS(PooledOLS):
Expand Down

0 comments on commit 457c79c

Please sign in to comment.