From 02f4f0d85abd41067aa414886e57cbb30f63397f Mon Sep 17 00:00:00 2001
From: Kevin Sheppard <kevin.k.sheppard@gmail.com>
Date: Tue, 31 Oct 2017 11:59:56 +0000
Subject: [PATCH] DOC: Improe anova doc strings

Provide list of output for ANOVA docstring

closes #3306
---
 statsmodels/stats/anova.py | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/statsmodels/stats/anova.py b/statsmodels/stats/anova.py
index db71a46e4d3..961412233e3 100644
--- a/statsmodels/stats/anova.py
+++ b/statsmodels/stats/anova.py
@@ -284,18 +284,25 @@ def anova_lm(*args, **kwargs):
     scale : float
         Estimate of variance, If None, will be estimated from the largest
         model. Default is None.
-    test : str {"F", "Chisq", "Cp"} or None
-        Test statistics to provide. Default is "F".
-    typ : str or int {"I","II","III"} or {1,2,3}
+    test : str {'F', 'Chisq', 'Cp'} or None
+        Test statistics to provide. Default is 'F'.
+    typ : str or int {'I','II','III'} or {1,2,3}
         The type of Anova test to perform. See notes.
-    robust : {None, "hc0", "hc1", "hc2", "hc3"}
+    robust : {None, 'hc0', 'hc1', 'hc2', 'hc3'}
         Use heteroscedasticity-corrected coefficient covariance matrix.
         If robust covariance is desired, it is recommended to use `hc3`.
 
     Returns
     -------
     anova : DataFrame
-    A DataFrame containing.
+        A DataFrame with columns:
+
+        - 'sum_sq': Sum of squared residuals
+        - 'df': Difference in sum of squared residuals
+        - Test Name: this column has a name that depends on the test used, for
+          example 'F'.  It contains the value of the test statistic.
+        - Pvalue: The column has a name that depends on the type of the test,
+          for example PR(>F).  Contains the p-value of the test.
 
     Notes
     -----
@@ -310,10 +317,10 @@ def anova_lm(*args, **kwargs):
     --------
     >>> import statsmodels.api as sm
     >>> from statsmodels.formula.api import ols
-    >>> moore = sm.datasets.get_rdataset("Moore", "car", cache=True) # load
+    >>> moore = sm.datasets.get_rdataset('Moore', 'car', cache=True) # load
     >>> data = moore.data
-    >>> data = data.rename(columns={"partner.status" :
-    ...                             "partner_status"}) # make name pythonic
+    >>> data = data.rename(columns={'partner.status' :
+    ...                             'partner_status'}) # make name pythonic
     >>> moore_lm = ols('conformity ~ C(fcategory, Sum)*C(partner_status, Sum)',
     ...                 data=data).fit()
     >>> table = sm.stats.anova_lm(moore_lm, typ=2) # Type 2 Anova DataFrame
@@ -348,7 +355,7 @@ def anova_lm(*args, **kwargs):
     model_formula = []
     pr_test = "Pr(>%s)" % test
     names = ['df_resid', 'ssr', 'df_diff', 'ss_diff', test, pr_test]
-    table = DataFrame(np.zeros((n_models, 6)), columns = names)
+    table = DataFrame(np.zeros((n_models, 6)), columns=names)
 
     if not scale: # assume biggest model is last
         scale = args[-1].scale