/
Hypergeometric.hs
125 lines (106 loc) · 3.9 KB
/
Hypergeometric.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
{-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-}
-- |
-- Module : Statistics.Distribution.Hypergeometric
-- Copyright : (c) 2009 Bryan O'Sullivan
-- License : BSD3
--
-- Maintainer : bos@serpentine.com
-- Stability : experimental
-- Portability : portable
--
-- The Hypergeometric distribution. This is the discrete probability
-- distribution that measures the probability of /k/ successes in /l/
-- trials, without replacement, from a finite population.
--
-- The parameters of the distribution describe /k/ elements chosen
-- from a population of /l/, with /m/ elements of one type, and
-- /l/-/m/ of the other (all are positive integers).
module Statistics.Distribution.Hypergeometric
(
HypergeometricDistribution
-- * Constructors
, hypergeometric
-- ** Accessors
, hdM
, hdL
, hdK
) where
import Data.Binary (Binary)
import Data.Data (Data, Typeable)
import GHC.Generics (Generic)
import Numeric.MathFunctions.Constants (m_epsilon)
import Numeric.SpecFunctions (choose)
import qualified Statistics.Distribution as D
import Data.Binary (put, get)
import Control.Applicative ((<$>), (<*>))
data HypergeometricDistribution = HD {
hdM :: {-# UNPACK #-} !Int
, hdL :: {-# UNPACK #-} !Int
, hdK :: {-# UNPACK #-} !Int
} deriving (Eq, Read, Show, Typeable, Data, Generic)
instance Binary HypergeometricDistribution where
get = HD <$> get <*> get <*> get
put (HD x y z) = put x >> put y >> put z
instance D.Distribution HypergeometricDistribution where
cumulative = cumulative
instance D.DiscreteDistr HypergeometricDistribution where
probability = probability
instance D.Mean HypergeometricDistribution where
mean = mean
instance D.Variance HypergeometricDistribution where
variance = variance
instance D.MaybeMean HypergeometricDistribution where
maybeMean = Just . D.mean
instance D.MaybeVariance HypergeometricDistribution where
maybeStdDev = Just . D.stdDev
maybeVariance = Just . D.variance
instance D.Entropy HypergeometricDistribution where
entropy = directEntropy
instance D.MaybeEntropy HypergeometricDistribution where
maybeEntropy = Just . D.entropy
variance :: HypergeometricDistribution -> Double
variance (HD m l k) = (k' * ml) * (1 - ml) * (l' - k') / (l' - 1)
where m' = fromIntegral m
l' = fromIntegral l
k' = fromIntegral k
ml = m' / l'
{-# INLINE variance #-}
mean :: HypergeometricDistribution -> Double
mean (HD m l k) = fromIntegral k * fromIntegral m / fromIntegral l
{-# INLINE mean #-}
directEntropy :: HypergeometricDistribution -> Double
directEntropy d@(HD m _ _) =
negate . sum $
takeWhile (< negate m_epsilon) $
dropWhile (not . (< negate m_epsilon)) $
[ let x = probability d n in x * log x | n <- [0..m]]
hypergeometric :: Int -- ^ /m/
-> Int -- ^ /l/
-> Int -- ^ /k/
-> HypergeometricDistribution
hypergeometric m l k
| not (l > 0) = error $ msg ++ "l must be positive"
| not (m >= 0 && m <= l) = error $ msg ++ "m must lie in [0,l] range"
| not (k > 0 && k <= l) = error $ msg ++ "k must lie in (0,l] range"
| otherwise = HD m l k
where
msg = "Statistics.Distribution.Hypergeometric.hypergeometric: "
{-# INLINE hypergeometric #-}
-- Naive implementation
probability :: HypergeometricDistribution -> Int -> Double
probability (HD mi li ki) n
| n < max 0 (mi+ki-li) || n > min mi ki = 0
| otherwise =
choose mi n * choose (li - mi) (ki - n) / choose li ki
{-# INLINE probability #-}
cumulative :: HypergeometricDistribution -> Double -> Double
cumulative d@(HD mi li ki) x
| isNaN x = error "Statistics.Distribution.Hypergeometric.cumulative: NaN argument"
| isInfinite x = if x > 0 then 1 else 0
| n < minN = 0
| n >= maxN = 1
| otherwise = D.sumProbabilities d minN n
where
n = floor x
minN = max 0 (mi+ki-li)
maxN = min mi ki