forked from CPJKU/madmom
-
Notifications
You must be signed in to change notification settings - Fork 0
/
stats.py
73 lines (63 loc) · 2.24 KB
/
stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# encoding: utf-8
# pylint: disable=no-member
# pylint: disable=invalid-name
# pylint: disable=too-many-arguments
"""
This module contains some statistical functionality.
"""
from __future__ import absolute_import, division, print_function
import numpy as np
def mcnemar_test(test_1, test_2, significance=0.01):
"""
Perform McNemar's statistical test.
Parameters
----------
test_1 : numpy array
Test 1 sample(s).
test_2 : numpy array
Test 2 sample(s).
significance : float, optional
Significance level.
Returns
-------
significance : int
Significance {-1, 0, +1}.
p_value : float
P-value.
Notes
-----
Please see: http://en.wikipedia.org/wiki/McNemar%27s_test
+-----------------+-----------------+-----------------+-----------+
| | Test 2 positive | Test 2 negative | Row total |
+-----------------+-----------------+-----------------+-----------+
| Test 1 positive | a | b | a + b |
| Test 1 negative | c | d | c + d |
+-----------------+-----------------+-----------------+-----------+
| Column total | a + c | b + d | n |
+-----------------+-----------------+-----------------+-----------+
"""
from scipy.stats import chi2
# convert the tests to numpy arrays
test_1 = np.asarray(test_1)
test_2 = np.asarray(test_2)
# both test must have the same length
if not (test_1.size == test_2.size and test_1.shape == test_2.shape):
raise ValueError("Both tests must have the same size and shape.")
# calculate a, b, c, d
# a = np.sum(test_1 * test_2)
b = np.sum(test_1 > test_2)
c = np.sum(test_1 < test_2)
# d = np.sum(-test_1 * -test_2)
# is the approximation ok?
if b + c < 25:
raise NotImplementedError("implement correct binomial distribution or "
"use bigger sample sizes (b + c > 25)")
# statistical test
stat = (b - c) ** 2 / float(b + c)
# test under chi square distribution
p = chi2(1).sf(stat)
# direction of significance
sig = 0
if p < significance:
sig = 1 if b > c else -1
return sig, p