update wald wolfowitz and tests

aschleg · Aug 8, 2020 · 98c0f9e · 98c0f9e
1 parent 38e5f76
commit 98c0f9e
Show file tree

Hide file tree

Showing 2 changed files with 37 additions and 25 deletions.
diff --git a/hypothetical/nonparametric.py b/hypothetical/nonparametric.py
@@ -1168,7 +1168,7 @@ def __init__(self, x, continuity=False):
         self.continuity = continuity
         self.test_summary = self._runs_test()
 
-    def _runs_test(self, n1=None, n2=None):
+    def _runs_test(self):
         r"""
         Primary method for performing the one-sample runs test.
 
@@ -1775,8 +1775,7 @@ def __init__(self, x, y, continuity=True):
         self.n1, self.n2 = len(x), len(y)
         self.a = np.sort(np.array(self.x + self.y))
         self.continuity = continuity
-        self.r = count_runs(self.a)[1]
-        self.test_summary = self._test()
+        self.r, self.test_summary = self._test()
         self.p_value = self.test_summary['p-value']
         self.probability = self.test_summary['probability']
         self.description = 'Wald-Wolfowitz Runs Test for Two Independent Samples'
@@ -1787,7 +1786,13 @@ def __init__(self, x, y, continuity=True):
             pass
 
     def _test(self):
-        r_range = np.arange(2, self.r + 1)
+        a = pd.DataFrame({'a': list(np.repeat('A', len(self.x))), 'b': self.x})
+        b = pd.DataFrame({'a': list(np.repeat('B', len(self.y))), 'b': self.y})
+        c = a.append(b)
+        d = c.sort_values('b')['a']
+
+        r = count_runs(d)[1]
+        r_range = np.arange(2, r + 1)
         evens = r_range[r_range % 2 == 0]
         odds = r_range[r_range % 2 != 0]
 
@@ -1807,28 +1812,28 @@ def _test(self):
 
             test_summary = {
                 'probability': p,
+                'runs': r,
                 'r critical value 1': r_crit_1,
                 'r critical value 2': r_crit_2
             }
-            return test_summary
-
         else:
             mean = (2 * self.n1 * self.n2) / (self.n1 + self.n2) + 1
             sd = np.sqrt((2 * self.n1 * self.n2 * (2 * self.n1 * self.n2 - self.n1 - self.n2)) /
                          ((self.n1 + self.n2) ** 2 * (self.n1 + self.n2 - 1)))
-            z = (np.abs(self.r - mean) - self.continuity * 0.5) / sd
-            p_val = norm.sf(z) * 2
+            z = (np.abs(r - mean) - self.continuity * 0.5) / sd
+            p_val = norm.sf(z)
 
             test_summary = {
                 'probability': p,
+                'runs': r,
                 'mean of runs': mean,
                 'standard deviation of runs': sd,
                 'z-value': z,
                 'p-value': p_val,
                 'continuity': self.continuity
             }
 
-            return test_summary
+        return r, test_summary
 
 
 class WilcoxonTest(object):
@@ -2201,6 +2206,6 @@ def count_runs(x, index=1):
     """
     runs = np.array([sum(1 for _ in r) for _, r in groupby(np.array(x))])
 
-    run_count = np.sum(runs > 1)
+    run_count = len(runs)
 
     return runs, run_count
diff --git a/tests/test_nonparametric.py b/tests/test_nonparametric.py
@@ -309,23 +309,23 @@ class TestRunsTest(object):
     def test_runs_test_small_sample(self):
         r = RunsTest(self.o)
 
-        assert r.r == 2
-        # assert_almost_equal(r.test_summary['probability'], 0.7672105672105671)
-        # assert_almost_equal(r.test_summary['r critical value 1'], 4)
-        # assert_almost_equal(r.test_summary['r critical value 2'], 13)
-        # assert_array_equal(r.runs, [1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 1, 1])
+        assert r.r == 12
+        assert_almost_equal(r.test_summary['probability'], 0.7672105672105671)
+        assert_almost_equal(r.test_summary['r critical value 1'], 4)
+        assert_almost_equal(r.test_summary['r critical value 2'], 13)
+        assert_array_equal(r.runs, [1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 1, 1])
 
     def test_runs_test_large_sample(self):
         r = RunsTest(self.o2)
 
-        assert r.r == 9
-        # assert_almost_equal(r.test_summary['probability'], 0.7444926712311586)
-        # assert_almost_equal(r.test_summary['mean of runs'], 25.0)
-        # assert_almost_equal(r.test_summary['standard deviation of runs'], 3.356382892705923)
-        # assert_almost_equal(r.test_summary['z-value'], 2.9793978576556204)
-        # assert_almost_equal(r.test_summary['p-value'], 0.0028881550292776965)
-        # assert_array_equal(r.runs, [1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1, 1, 2, 3, 1, 1,
-        #                             1, 1, 1, 1, 2, 1, 2, 1, 4, 1, 1, 1, 2])
+        assert r.r == 35
+        assert_almost_equal(r.test_summary['probability'], 0.7444926712311586)
+        assert_almost_equal(r.test_summary['mean of runs'], 25.0)
+        assert_almost_equal(r.test_summary['standard deviation of runs'], 3.356382892705923)
+        assert_almost_equal(r.test_summary['z-value'], 2.9793978576556204)
+        assert_almost_equal(r.test_summary['p-value'], 0.0028881550292776965)
+        assert_array_equal(r.runs, [1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1, 1, 2, 3, 1, 1,
+                                    1, 1, 1, 1, 2, 1, 2, 1, 4, 1, 1, 1, 2])
 
 
 class TestVanDerWaerden(object):
@@ -344,8 +344,15 @@ class TestWaldWolfowitz(object):
     c = [23, 8, 24, 15, 8, 6, 15, 15, 21, 23, 16, 15, 24, 15, 21, 15, 18, 14, 22, 15, 14]
 
     def test_wald_wolfowitz(self):
-        #w = WaldWolfowitz()
-        pass
+        w = WaldWolfowitz(x=self.e, y=self.c)
+
+        assert w.r == 6
+        assert_almost_equal(w.z, 2.907936367882308)
+        assert_almost_equal(w.p_value, 0.00181911179630756)
+
+        w2 = WaldWolfowitz(x=self.e, y=self.c, continuity=False)
+        assert_almost_equal(w2.z, 3.146831990172923)
+        assert_almost_equal(w2.p_value, 0.0008252488525844856)
 
 
 def test_tie_correction():