Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Correct Mann-Whiteney U Test #184

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ Alternatively you can pull it from the central Maven repositories:
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-math3</artifactId>
<version>3.5</version>
<version>3.6.1</version>
</dependency>
```

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,66 @@ private double[] concatenateSamples(final double[] x, final double[] y) {
return z;
}

/**
* Computes the <a
* href="http://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U"> Mann-Whitney
* U statistic</a> comparing mean for two independent samples possibly of
* different length.
* <p>
* This statistic can be used to perform a Mann-Whitney U test evaluating
* the null hypothesis that the two independent samples has equal mean.
* </p>
* <p>
* Let X<sub>i</sub> denote the i'th individual of the first sample and
* Y<sub>j</sub> the j'th individual in the second sample. Note that the
* samples would often have different length.
* </p>
* <p>
* <strong>Preconditions</strong>:
* <ul>
* <li>All observations in the two samples are independent.</li>
* <li>The observations are at least ordinal (continuous are also ordinal).</li>
* </ul>
*
* @param x the first sample
* @param y the second sample
* @return Mann-Whitney U statistic (minimum of U<sup>x</sup> and U<sup>y</sup>)
* @throws NullArgumentException if {@code x} or {@code y} are {@code null}.
* @throws NoDataException if {@code x} or {@code y} are zero-length.
*/
public double mannWhitneyUMin(final double[] x, final double[] y)
throws NullArgumentException, NoDataException {

ensureDataConformance(x, y);

final double[] z = concatenateSamples(x, y);
final double[] ranks = naturalRanking.rank(z);

double sumRankX = 0;

/*
* The ranks for x is in the first x.length entries in ranks because x
* is in the first x.length entries in z
*/
for (int i = 0; i < x.length; ++i) {
sumRankX += ranks[i];
}

/*
* U1 = R1 - (n1 * (n1 + 1)) / 2 where R1 is sum of ranks for sample 1,
* e.g. x, n1 is the number of observations in sample 1.
*/
final double U1 = sumRankX - ((long) x.length * (x.length + 1)) / 2;

/*
* It can be shown that U1 + U2 = n1 * n2
*/
final double U2 = (long) x.length * y.length - U1;

return FastMath.min(U1, U2);
}


/**
* Computes the <a
* href="http://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U"> Mann-Whitney
Expand Down Expand Up @@ -121,7 +181,7 @@ private double[] concatenateSamples(final double[] x, final double[] y) {
* @throws NullArgumentException if {@code x} or {@code y} are {@code null}.
* @throws NoDataException if {@code x} or {@code y} are zero-length.
*/
public double mannWhitneyU(final double[] x, final double[] y)
public double mannWhitneyUMax(final double[] x, final double[] y)
throws NullArgumentException, NoDataException {

ensureDataConformance(x, y);
Expand Down Expand Up @@ -223,7 +283,7 @@ public double mannWhitneyUTest(final double[] x, final double[] y)

ensureDataConformance(x, y);

final double Umax = mannWhitneyU(x, y);
final double Umax = mannWhitneyUMax(x, y);

/*
* It can be shown that U1 + U2 = n1 * n2
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.math4.stat.inference;

import org.apache.commons.math4.exception.NoDataException;
import org.apache.commons.math4.exception.NullArgumentException;
import org.junit.Assert;
import org.junit.Test;


/**
* Test cases for the MannWhitneyUTestImpl class.
*
*/

public class MannWhitneyUTestMinTest {

protected MannWhitneyUTest testStatistic = new MannWhitneyUTest();

@Test
public void testMannWhitneyUSimple() {
/* Target values computed using R version 2.11.1
* x <- c(19, 22, 16, 29, 24)
* y <- c(20, 11, 17, 12)
* wilcox.test(x, y, alternative = "two.sided", mu = 0, paired = FALSE, exact = FALSE, correct = FALSE)
* W = 17, p-value = 0.08641
*/
final double x[] = {19, 22, 16, 29, 24};
final double y[] = {20, 11, 17, 12};

Assert.assertEquals(17, testStatistic.mannWhitneyUMin(x, y), 1e-10);
Assert.assertEquals(0.08641, testStatistic.mannWhitneyUTestMin(x, y), 1e-5);
}


@Test
public void testMannWhitneyUInputValidation() {
/* Samples must be present, i.e. length > 0
*/
try {
testStatistic.mannWhiteneyUTestMin(new double[] { }, new double[] { 1.0 });
Assert.fail("x does not contain samples (exact), NoDataException expected");
} catch (NoDataException ex) {
// expected
}

try {
testStatistic.mannWhiteneyUTestMin(new double[] { 1.0 }, new double[] { });
Assert.fail("y does not contain samples (exact), NoDataException expected");
} catch (NoDataException ex) {
// expected
}

/*
* x and y is null
*/
try {
testStatistic.mannWhiteneyUTestMin(null, null);
Assert.fail("x and y is null (exact), NullArgumentException expected");
} catch (NullArgumentException ex) {
// expected
}

try {
testStatistic.mannWhiteneyUTestMin(null, null);
Assert.fail("x and y is null (asymptotic), NullArgumentException expected");
} catch (NullArgumentException ex) {
// expected
}

/*
* x or y is null
*/
try {
testStatistic.mannWhiteneyUTestMin(null, new double[] { 1.0 });
Assert.fail("x is null (exact), NullArgumentException expected");
} catch (NullArgumentException ex) {
// expected
}

try {
testStatistic.mannWhiteneyUTestMin(new double[] { 1.0 }, null);
Assert.fail("y is null (exact), NullArgumentException expected");
} catch (NullArgumentException ex) {
// expected
}
}

@Test
public void testBigDataSet() {
double[] d1 = new double[1500];
double[] d2 = new double[1500];
for (int i = 0; i < 1500; i++) {
d1[i] = 2 * i;
d2[i] = 2 * i + 1;
}
double result = testStatistic.mannWhiteneyUTestMin(d1, d2);
Assert.assertTrue(result > 0.1);
}

@Test
public void testBigDataSetOverflow() {
// MATH-1145
double[] d1 = new double[110000];
double[] d2 = new double[110000];
for (int i = 0; i < 110000; i++) {
d1[i] = i;
d2[i] = i;
}
double result = testStatistic.mannWhiteneyUTestMin(d1, d2);
Assert.assertTrue(result == 1.0);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ public void testMannWhitneyUSimple() {
final double x[] = {19, 22, 16, 29, 24};
final double y[] = {20, 11, 17, 12};

Assert.assertEquals(17, testStatistic.mannWhitneyU(x, y), 1e-10);
Assert.assertEquals(0.08641, testStatistic.mannWhitneyUTest(x, y), 1e-5);
Assert.assertEquals(17, testStatistic.mannWhitneyUMax(x, y), 1e-10);
Assert.assertEquals(0.08641, testStatistic.mannWhitneyUTestMax(x, y), 1e-5);
}


Expand All @@ -52,14 +52,14 @@ public void testMannWhitneyUInputValidation() {
/* Samples must be present, i.e. length > 0
*/
try {
testStatistic.mannWhitneyUTest(new double[] { }, new double[] { 1.0 });
testStatistic.mannWhiteneyUTestMax(new double[] { }, new double[] { 1.0 });
Assert.fail("x does not contain samples (exact), NoDataException expected");
} catch (NoDataException ex) {
// expected
}

try {
testStatistic.mannWhitneyUTest(new double[] { 1.0 }, new double[] { });
testStatistic.mannWhiteneyUTestMax(new double[] { 1.0 }, new double[] { });
Assert.fail("y does not contain samples (exact), NoDataException expected");
} catch (NoDataException ex) {
// expected
Expand All @@ -69,14 +69,14 @@ public void testMannWhitneyUInputValidation() {
* x and y is null
*/
try {
testStatistic.mannWhitneyUTest(null, null);
testStatistic.mannWhiteneyUTestMax(null, null);
Assert.fail("x and y is null (exact), NullArgumentException expected");
} catch (NullArgumentException ex) {
// expected
}

try {
testStatistic.mannWhitneyUTest(null, null);
testStatistic.mannWhiteneyUTestMax(null, null);
Assert.fail("x and y is null (asymptotic), NullArgumentException expected");
} catch (NullArgumentException ex) {
// expected
Expand All @@ -86,14 +86,14 @@ public void testMannWhitneyUInputValidation() {
* x or y is null
*/
try {
testStatistic.mannWhitneyUTest(null, new double[] { 1.0 });
testStatistic.mannWhiteneyUTestMax(null, new double[] { 1.0 });
Assert.fail("x is null (exact), NullArgumentException expected");
} catch (NullArgumentException ex) {
// expected
}

try {
testStatistic.mannWhitneyUTest(new double[] { 1.0 }, null);
testStatistic.mannWhiteneyUTestMax(new double[] { 1.0 }, null);
Assert.fail("y is null (exact), NullArgumentException expected");
} catch (NullArgumentException ex) {
// expected
Expand All @@ -108,7 +108,7 @@ public void testBigDataSet() {
d1[i] = 2 * i;
d2[i] = 2 * i + 1;
}
double result = testStatistic.mannWhitneyUTest(d1, d2);
double result = testStatistic.mannWhiteneyUTestMax(d1, d2);
Assert.assertTrue(result > 0.1);
}

Expand All @@ -121,7 +121,7 @@ public void testBigDataSetOverflow() {
d1[i] = i;
d2[i] = i;
}
double result = testStatistic.mannWhitneyUTest(d1, d2);
double result = testStatistic.mannWhiteneyUTestMax(d1, d2);
Assert.assertTrue(result == 1.0);
}
}