ENH: Add Driscoll-Kraay Covariance

Add Driscoll-Kraay Covariance Refactor comparrisons to use common code Refactor general demeanining
bashtage · Apr 21, 2017 · 3dcd0e5 · 3dcd0e5
1 parent 0a92989
commit 3dcd0e5
Show file tree

Hide file tree

Showing 22 changed files with 461 additions and 410 deletions.
diff --git a/doc/source/panel/covariance.rst b/doc/source/panel/covariance.rst
@@ -23,3 +23,9 @@ One- and Two-way Clustered covariance estimator
 .. autoclass:: ClusteredCovariance
    :members:
    :inherited-members:
+
+Driscoll-Kraay HAC Estimator
+============================
+.. autoclass:: DriscollKraay
+   :members:
+   :inherited-members:
diff --git a/linearmodels/compat/pandas.py b/linearmodels/compat/pandas.py
@@ -1,11 +1,13 @@
+from pandas.core.common import is_string_like
+
 try:
-    from pandas.api.types import (is_string_like, is_numeric_dtype,
-                                  is_categorical, is_string_dtype,
-                                  is_categorical_dtype, is_datetime64_any_dtype)
+    from pandas.api.types import (is_numeric_dtype, is_categorical,
+                                  is_string_dtype, is_categorical_dtype,
+                                  is_datetime64_any_dtype)
 except ImportError:
-    from pandas.core.common import (is_string_dtype, is_string_like,
-                                    is_numeric_dtype, is_categorical,
-                                    is_categorical_dtype, is_datetime64_any_dtype)
+    from pandas.core.common import (is_string_dtype, is_numeric_dtype,
+                                    is_categorical, is_categorical_dtype,
+                                    is_datetime64_any_dtype)
 
 __all__ = ['is_string_dtype', 'is_numeric_dtype', 'is_categorical',
-           'is_string_like', 'is_categorical_dtype']
+           'is_string_like', 'is_categorical_dtype', 'is_datetime64_any_dtype']
diff --git a/linearmodels/datasets/wage_panel/__init__.py b/linearmodels/datasets/wage_panel/__init__.py
@@ -1,6 +1,6 @@
 DESCR = """
-F. Vella and M. Verbeek (1998), “Whose Wages Do Unions Raise? A Dynamic Model 
-of Unionism and Wage Rate Determination for Young Men,” Journal of Applied 
+F. Vella and M. Verbeek (1998), “Whose Wages Do Unions Raise? A Dynamic Model
+of Unionism and Wage Rate Determination for Young Men,” Journal of Applied
 Econometrics 13, 163-183.
 
 nr                       person identifier
@@ -17,6 +17,7 @@
 occupation               Occupation code
 """
 
+
 def load():
     from linearmodels import datasets
     return datasets.load(__file__, 'wage_panel.csv.bz2')
diff --git a/linearmodels/iv/covariance.py b/linearmodels/iv/covariance.py
@@ -3,7 +3,8 @@
 """
 from __future__ import absolute_import, division, print_function
 
-from numpy import arange, argsort, asarray, ceil, cos, empty, ones, pi, r_, sin, sum, unique, where, zeros, int64
+from numpy import arange, argsort, asarray, ceil, cos, empty, ones, pi, \
+    r_, sin, sum, unique, where, zeros, int64
 from numpy.linalg import inv, pinv
 
 CLUSTER_ERR = """

diff --git a/linearmodels/iv/data.py b/linearmodels/iv/data.py
@@ -7,8 +7,8 @@
 import pandas as pd
 import xarray as xr
 
-from linearmodels.compat.pandas import is_categorical, is_categorical_dtype, is_numeric_dtype, is_string_dtype, \
-    is_string_like
+from linearmodels.compat.pandas import is_categorical, is_categorical_dtype,  \
+    is_numeric_dtype, is_string_dtype, is_string_like
 
 dim_err = '{0} has too many dims.  Maximum is 2, actual is {1}'
 type_err = 'Only ndarrays, DataArrays and Series and DataFrames are permitted'

diff --git a/linearmodels/iv/gmm.py b/linearmodels/iv/gmm.py
@@ -6,7 +6,8 @@
 from numpy import asarray, unique
 from numpy.linalg import inv
 
-from linearmodels.iv.covariance import HomoskedasticCovariance, KERNEL_LOOKUP, _cov_cluster, _cov_kernel
+from linearmodels.iv.covariance import HomoskedasticCovariance, KERNEL_LOOKUP, \
+    _cov_cluster, _cov_kernel
 
 
 class HomoskedasticWeightMatrix(object):

diff --git a/linearmodels/iv/results.py b/linearmodels/iv/results.py
@@ -2,7 +2,6 @@
 Results containers and post-estimation diagnostics for IV models
 """
 import datetime as dt
-from collections import OrderedDict
 
 import scipy.stats as stats
 from numpy import array, asarray, c_, diag, empty, log, ones, sqrt
@@ -13,8 +12,8 @@
 from statsmodels.iolib.table import default_txt_fmt
 
 from linearmodels.iv._utility import annihilate, proj
-from linearmodels.utility import InvalidTestStatistic, WaldTestStatistic, _SummaryStr, _str, cached_property, \
-    pval_format
+from linearmodels.utility import InvalidTestStatistic, WaldTestStatistic, \
+    _SummaryStr, _str, cached_property, pval_format, _ModelComparison
 
 
 def stub_concat(lists, sep='='):
@@ -1152,7 +1151,7 @@ def compare(results):
     return IVModelComparison(results)
 
 
-class IVModelComparison(_SummaryStr):
+class IVModelComparison(_ModelComparison):
     """
     Comparison of multiple models
 
@@ -1162,78 +1161,16 @@ class IVModelComparison(_SummaryStr):
         Set of results to compare.  If a dict, the keys will be used as model
         names.  An OrderedDict will preserve the model order the comparisons.
     """
+    _supported = (IVResults, IVGMMResults, OLSResults)
 
     def __init__(self, results):
-
-        if not isinstance(results, (dict, OrderedDict)):
-            _results = OrderedDict()
-            for i, res in enumerate(results):
-                _results['Model ' + str(i)] = results[i]
-            results = _results
-        elif not isinstance(results, OrderedDict):
-            _results = OrderedDict()
-            for key in sorted(results.keys()):
-                _results[key] = results[key]
-            results = _results
-        self._results = results
-
-    def estimator_type(self):
-        pass
-
-    def _get_series_property(self, name):
-        out = ([(k, getattr(v, name)) for k, v in self._results.items()])
-        cols = [v[0] for v in out]
-        values = concat([v[1] for v in out], 1)
-        values.columns = cols
-        return values
-
-    def _get_property(self, name):
-        out = OrderedDict()
-        items = []
-        for k, v in self._results.items():
-            items.append(k)
-            out[k] = getattr(v, name)
-        return Series(out, name=name).loc[items]
-
-    @property
-    def nobs(self):
-        """Parameters for all models"""
-        return self._get_property('nobs')
-
-    @property
-    def params(self):
-        """Parameters for all models"""
-        return self._get_series_property('params')
-
-    @property
-    def tstats(self):
-        """Parameter t-stats for all models"""
-        return self._get_series_property('tstats')
-
-    @property
-    def pvalues(self):
-        """Parameter p-vals for all models"""
-        return self._get_series_property('pvalues')
-
-    @property
-    def rsquared(self):
-        """Coefficients of determination (R**2)"""
-        return self._get_property('rsquared')
+        super(IVModelComparison, self).__init__(results)
 
     @property
     def rsquared_adj(self):
         """Sample-size adjusted coefficients of determination (R**2)"""
         return self._get_property('rsquared_adj')
 
-    @property
-    def f_statistic(self):
-        """F-statistics and P-values"""
-        out = self._get_property('f_statistic')
-        out_df = DataFrame(empty((len(out), 2)), columns=['F stat', 'P-value'], index=out.index)
-        for loc in out.index:
-            out_df.loc[loc] = out[loc].stat, out[loc].pval
-        return out_df
-
     @property
     def estimator_method(self):
         """Estimation methods"""

diff --git a/linearmodels/panel/__init__.py b/linearmodels/panel/__init__.py
@@ -1,3 +1,4 @@
-from linearmodels.panel.model import PanelOLS, PooledOLS, RandomEffects, BetweenOLS, FirstDifferenceOLS
+from linearmodels.panel.model import PanelOLS, PooledOLS, RandomEffects, BetweenOLS, \
+    FirstDifferenceOLS
 
-__all__ = ['PanelOLS', 'PooledOLS', 'RandomEffects', 'FirstDifferenceOLS', 'BetweenOLS']
+__all__ = ['PanelOLS', 'PooledOLS', 'RandomEffects', 'FirstDifferenceOLS', 'BetweenOLS']