Skip to content

Commit

Permalink
BUG: Fix MANOVA when not using formulas
Browse files Browse the repository at this point in the history
Fix mv_test so that it works when not using formulas
Improve MANOVA docstring to help users

closes statsmodels#4903
closes statsmodels#5578
  • Loading branch information
bashtage committed May 7, 2019
1 parent 860b2bd commit c3ab504
Show file tree
Hide file tree
Showing 2 changed files with 97 additions and 5 deletions.
27 changes: 24 additions & 3 deletions statsmodels/multivariate/manova.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,19 @@
from __future__ import division

import numpy as np

from statsmodels.base.model import Model
from .multivariate_ols import _multivariate_ols_test, _hypotheses_doc
from .multivariate_ols import _multivariate_ols_fit
from .multivariate_ols import MultivariateTestResults
from .multivariate_ols import _multivariate_ols_fit
from .multivariate_ols import _multivariate_ols_test, _hypotheses_doc

__docformat__ = 'restructuredtext en'


class MANOVA(Model):
"""
Multivariate analysis of variance
The implementation of MANOVA is based on multivariate regression and does
not assume that the explanatory variables are categorical. Any type of
variables as in regression is allowed.
Expand All @@ -41,6 +44,14 @@ class MANOVA(Model):
exog : array
See Parameters.
Notes
-----
MANOVA is used though the `mv_test` function, and `fit` is not used.
The ``from_formula`` interface is the recommended method to specify
a model and simplifies testing without needing to manually configure
the contrast matrices.
References
----------
.. [*] ftp://public.dhe.ibm.com/software/analytics/spss/documentation/statistics/20.0/en/client/Manuals/IBM_SPSS_Statistics_Algorithms.pdf
Expand All @@ -53,6 +64,10 @@ def __init__(self, endog, exog, missing='none', hasconst=None, **kwargs):
hasconst=hasconst, **kwargs)
self._fittedmod = _multivariate_ols_fit(self.endog, self.exog)

def fit(self):
raise NotImplementedError('fit is not needed to use MANOVA. Call'
'mv_test directly on a MANOVA instance.')

def mv_test(self, hypotheses=None):
if hypotheses is None:
if (hasattr(self, 'data') and self.data is not None and
Expand All @@ -67,7 +82,7 @@ def mv_test(self, hypotheses=None):
for i in range(self.exog.shape[1]):
name = 'x%d' % (i)
L = np.zeros([1, self.exog.shape[1]])
L[i] = 1
L[0, i] = 1
hypotheses.append([name, L, None])

results = _multivariate_ols_test(hypotheses, self._fittedmod,
Expand Down Expand Up @@ -95,4 +110,10 @@ def mv_test(self, hypotheses=None):
where `params` is the regression coefficient matrix for the
linear model y = x * params
If the model is not specified using the formula interfact, then the hypotheses
test each included exogenous variable, one at a time. In most applications
with categorical variables, the ``from_formula`` interface should be preferred
when specifying a model since it provides knowledge about the model when
specifying the hypotheses.
""")
75 changes: 73 additions & 2 deletions statsmodels/multivariate/tests/test_manova.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,12 @@

import numpy as np
import pandas as pd
import pytest
from numpy.testing import assert_almost_equal, assert_raises

from statsmodels.multivariate.manova import MANOVA
from numpy.testing import assert_almost_equal
from numpy.testing import assert_raises
from statsmodels.multivariate.multivariate_ols import MultivariateTestResults
from statsmodels.tools import add_constant

# Example data
# https://support.sas.com/documentation/cdl/en/statug/63033/HTML/default/
Expand Down Expand Up @@ -73,6 +76,74 @@ def test_manova_sas_example():
0.4109, decimal=4)


def test_manova_no_formula():
# Same as previous test only skipping formula interface
exog = add_constant(pd.get_dummies(X[['Loc']], drop_first=True))
endog = X[['Basal', 'Occ', 'Max']]
mod = MANOVA(endog, exog)
intercept = np.zeros((1, 3))
intercept[0, 0] = 1
loc = np.zeros((2, 3))
loc[0, 1] = loc[1, 2] = 1
hypotheses = [('Intercept', intercept), ('Loc', loc)]
r = mod.mv_test(hypotheses)
assert_almost_equal(r['Loc']['stat'].loc["Wilks' lambda", 'Value'],
0.60143661, decimal=8)
assert_almost_equal(r['Loc']['stat'].loc["Pillai's trace", 'Value'],
0.44702843, decimal=8)
assert_almost_equal(r['Loc']['stat'].loc["Hotelling-Lawley trace",
'Value'],
0.58210348, decimal=8)
assert_almost_equal(r['Loc']['stat'].loc["Roy's greatest root", 'Value'],
0.35530890, decimal=8)
assert_almost_equal(r['Loc']['stat'].loc["Wilks' lambda", 'F Value'],
0.77, decimal=2)
assert_almost_equal(r['Loc']['stat'].loc["Pillai's trace", 'F Value'],
0.86, decimal=2)
assert_almost_equal(r['Loc']['stat'].loc["Hotelling-Lawley trace",
'F Value'],
0.75, decimal=2)
assert_almost_equal(r['Loc']['stat'].loc["Roy's greatest root", 'F Value'],
1.07, decimal=2)
assert_almost_equal(r['Loc']['stat'].loc["Wilks' lambda", 'Num DF'],
6, decimal=3)
assert_almost_equal(r['Loc']['stat'].loc["Pillai's trace", 'Num DF'],
6, decimal=3)
assert_almost_equal(r['Loc']['stat'].loc["Hotelling-Lawley trace",
'Num DF'],
6, decimal=3)
assert_almost_equal(r['Loc']['stat'].loc["Roy's greatest root", 'Num DF'],
3, decimal=3)
assert_almost_equal(r['Loc']['stat'].loc["Wilks' lambda", 'Den DF'],
16, decimal=3)
assert_almost_equal(r['Loc']['stat'].loc["Pillai's trace", 'Den DF'],
18, decimal=3)
assert_almost_equal(r['Loc']['stat'].loc["Hotelling-Lawley trace",
'Den DF'],
9.0909, decimal=4)
assert_almost_equal(r['Loc']['stat'].loc["Roy's greatest root", 'Den DF'],
9, decimal=3)
assert_almost_equal(r['Loc']['stat'].loc["Wilks' lambda", 'Pr > F'],
0.6032, decimal=4)
assert_almost_equal(r['Loc']['stat'].loc["Pillai's trace", 'Pr > F'],
0.5397, decimal=4)
assert_almost_equal(r['Loc']['stat'].loc["Hotelling-Lawley trace",
'Pr > F'],
0.6272, decimal=4)
assert_almost_equal(r['Loc']['stat'].loc["Roy's greatest root", 'Pr > F'],
0.4109, decimal=4)


@pytest.mark.smoke
def test_manova_no_formula_no_hypothesis():
# Same as previous test only skipping formula interface
exog = add_constant(pd.get_dummies(X[['Loc']], drop_first=True))
endog = X[['Basal', 'Occ', 'Max']]
mod = MANOVA(endog, exog)
r = mod.mv_test()
assert isinstance(r, MultivariateTestResults)


def test_manova_test_input_validation():
mod = MANOVA.from_formula('Basal + Occ + Max ~ Loc', data=X)
hypothesis = [('test', np.array([[1, 1, 1]]), None)]
Expand Down

0 comments on commit c3ab504

Please sign in to comment.