Skip to content

Commit

Permalink
BUG: fix errors in mstats.ttest_rel and mstats.ttest_ind.
Browse files Browse the repository at this point in the history
Change default of axis kw to 0 in ttest_rel.  This is OK without warning
because the function didn't work before anyway.

Also provide basic test coverage (comparison against nonmasked version).
Testing with various masked array inputs to be done.

Closes scipygh-3047.
  • Loading branch information
rgommers authored and André Gaul committed Dec 4, 2013
1 parent 91eaa06 commit 21ca1ac
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 5 deletions.
15 changes: 11 additions & 4 deletions scipy/stats/mstats_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -778,22 +778,29 @@ def ttest_onesamp(a, popmean):

def ttest_ind(a, b, axis=0):
a, b, axis = _chk2_asarray(a, b, axis)
if a.size == 0 or b.size == 0:
return (np.nan, np.nan)

(x1, x2) = (a.mean(axis), b.mean(axis))
(v1, v2) = (a.var(axis=axis, ddof=1), b.var(axis=axis, ddof=1))
(n1, n2) = (a.count(axis), b.count(axis))
df = n1+n2-2
svar = ((n1-1)*v1+(n2-1)*v2) / float(df)
df = n1 + n2 - 2.
svar = ((n1-1)*v1+(n2-1)*v2) / df
t = (x1-x2)/ma.sqrt(svar*(1.0/n1 + 1.0/n2)) # N-D COMPUTATION HERE!!!!!!
t = ma.filled(t, 1) # replace NaN t-values with 1.0
probs = betai(0.5*df,0.5,float(df)/(df+t*t)).reshape(t.shape)
probs = betai(0.5 * df, 0.5, df/(df + t*t)).reshape(t.shape)
return t, probs.squeeze()
ttest_ind.__doc__ = stats.ttest_ind.__doc__


def ttest_rel(a,b,axis=None):
def ttest_rel(a, b, axis=0):
a, b, axis = _chk2_asarray(a, b, axis)
if len(a) != len(b):
raise ValueError('unequal length arrays')

if a.size == 0 or b.size == 0:
return (np.nan, np.nan)

(x1, x2) = (a.mean(axis), b.mean(axis))
(v1, v2) = (a.var(axis=axis, ddof=1), b.var(axis=axis, ddof=1))
n = a.count(axis)
Expand Down
6 changes: 6 additions & 0 deletions scipy/stats/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -3251,6 +3251,9 @@ def ttest_ind(a, b, axis=0, equal_var=True):
"""
a, b, axis = _chk2_asarray(a, b, axis)
if a.size == 0 or b.size == 0:
return (np.nan, np.nan)

v1 = np.var(a, axis, ddof=1)
v2 = np.var(b, axis, ddof=1)
n1 = a.shape[axis]
Expand Down Expand Up @@ -3335,6 +3338,9 @@ def ttest_rel(a, b, axis=0):
if a.shape[axis] != b.shape[axis]:
raise ValueError('unequal length arrays')

if a.size == 0 or b.size == 0:
return (np.nan, np.nan)

n = a.shape[axis]
df = float(n - 1)

Expand Down
67 changes: 66 additions & 1 deletion scipy/stats/tests/test_mstats_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from numpy.testing import TestCase, run_module_suite
from numpy.ma.testutils import (assert_equal, assert_almost_equal,
assert_array_almost_equal, assert_array_almost_equal_nulp, assert_,
assert_allclose)
assert_allclose, assert_raises)


class TestMquantiles(TestCase):
Expand Down Expand Up @@ -549,5 +549,70 @@ def test_nd_input(self):
assert_allclose(res_2d[1], [res_1d[1]] * 2)


class TestTtest_rel():

def test_vs_nonmasked(self):
np.random.seed(1234567)
outcome = np.random.randn(20, 4) + [0, 0, 1, 2]

# 1-D inputs
res1 = stats.ttest_rel(outcome[:, 0], outcome[:, 1])
res2 = mstats.ttest_rel(outcome[:, 0], outcome[:, 1])
assert_allclose(res1, res2)

# 2-D inputs
res1 = stats.ttest_rel(outcome[:, 0], outcome[:, 1], axis=None)
res2 = mstats.ttest_rel(outcome[:, 0], outcome[:, 1], axis=None)
assert_allclose(res1, res2)
res1 = stats.ttest_rel(outcome[:, :2], outcome[:, 2:], axis=0)
res2 = mstats.ttest_rel(outcome[:, :2], outcome[:, 2:], axis=0)
assert_allclose(res1, res2)

# Check default is axis=0
res3 = mstats.ttest_rel(outcome[:, :2], outcome[:, 2:])
assert_allclose(res2, res3)

def test_invalid_input_size(self):
assert_raises(ValueError, mstats.ttest_rel,
np.arange(10), np.arange(11))
x = np.arange(24)
assert_raises(ValueError, mstats.ttest_rel,
x.reshape(2, 3, 4), x.reshape(2, 4, 3), axis=1)
assert_raises(ValueError, mstats.ttest_rel,
x.reshape(2, 3, 4), x.reshape(2, 4, 3), axis=2)

def test_empty(self):
res1 = mstats.ttest_rel([], [])
assert_(np.all(np.isnan(res1)))


class TestTtest_ind():

def test_vs_nonmasked(self):
np.random.seed(1234567)
outcome = np.random.randn(20, 4) + [0, 0, 1, 2]

# 1-D inputs
res1 = stats.ttest_ind(outcome[:, 0], outcome[:, 1])
res2 = mstats.ttest_ind(outcome[:, 0], outcome[:, 1])
assert_allclose(res1, res2)

# 2-D inputs
res1 = stats.ttest_ind(outcome[:, 0], outcome[:, 1], axis=None)
res2 = mstats.ttest_ind(outcome[:, 0], outcome[:, 1], axis=None)
assert_allclose(res1, res2)
res1 = stats.ttest_ind(outcome[:, :2], outcome[:, 2:], axis=0)
res2 = mstats.ttest_ind(outcome[:, :2], outcome[:, 2:], axis=0)
assert_allclose(res1, res2)

# Check default is axis=0
res3 = mstats.ttest_ind(outcome[:, :2], outcome[:, 2:])
assert_allclose(res2, res3)

def test_empty(self):
res1 = mstats.ttest_ind([], [])
assert_(np.all(np.isnan(res1)))


if __name__ == "__main__":
run_module_suite()

0 comments on commit 21ca1ac

Please sign in to comment.