/
Distribution.hs
105 lines (89 loc) · 3.49 KB
/
Distribution.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
{-# LANGUAGE BangPatterns, ScopedTypeVariables #-}
-- |
-- Module : Statistics.Distribution
-- Copyright : (c) 2009 Bryan O'Sullivan
-- License : BSD3
--
-- Maintainer : bos@serpentine.com
-- Stability : experimental
-- Portability : portable
--
-- Types classes for probability distrubutions
module Statistics.Distribution
(
-- * Type classes
Distribution(..)
, DiscreteDistr(..)
, ContDistr(..)
, Mean(..)
, Variance(..)
-- * Helper functions
, findRoot
, sumProbabilities
) where
import qualified Data.Vector.Unboxed as U
-- | Type class common to all distributions. Only c.d.f. could be
-- defined for both discrete and continous distributions.
class Distribution d where
-- | Cumulative distribution function. The probability that a
-- random variable /X/ is less or equal than /x/,
-- i.e. P(/X/≤/x/).
cumulative :: d -> Double -> Double
-- | Discrete probability distribution.
class Distribution d => DiscreteDistr d where
-- | Probability of n-th outcome.
probability :: d -> Int -> Double
-- | Continuous probability distributuion
class Distribution d => ContDistr d where
-- | Probability density function. Probability that random
-- variable /X/ lies in the infinitesimal interval
-- [/x/,/x+/δ/x/) equal to /density(x)/⋅δ/x/
density :: d -> Double -> Double
-- | Inverse of the cumulative distribution function. The value
-- /x/ for which P(/X/≤/x/) = /p/.
quantile :: d -> Double -> Double
-- | Type class for distributions with mean.
class Distribution d => Mean d where
mean :: d -> Double
-- | Type class for distributions with variance.
class Mean d => Variance d where
variance :: d -> Double
data P = P {-# UNPACK #-} !Double {-# UNPACK #-} !Double
-- | Approximate the value of /X/ for which P(/x/>/X/)=/p/.
--
-- This method uses a combination of Newton-Raphson iteration and
-- bisection with the given guess as a starting point. The upper and
-- lower bounds specify the interval in which the probability
-- distribution reaches the value /p/.
findRoot :: ContDistr d =>
d -- ^ Distribution
-> Double -- ^ Probability /p/
-> Double -- ^ Initial guess
-> Double -- ^ Lower bound on interval
-> Double -- ^ Upper bound on interval
-> Double
findRoot d prob = loop 0 1
where
loop !(i::Int) !dx !x !lo !hi
| abs dx <= accuracy || i >= maxIters = x
| otherwise = loop (i+1) dx'' x'' lo' hi'
where
err = cumulative d x - prob
P lo' hi' | err < 0 = P x hi
| otherwise = P lo x
pdf = density d x
P dx' x' | pdf /= 0 = P (err / pdf) (x - dx)
| otherwise = P dx x
P dx'' x''
| x' < lo' || x' > hi' || pdf == 0 = let y = (lo' + hi') / 2
in P (y-x) y
| otherwise = P dx' x'
accuracy = 1e-15
maxIters = 150
-- | Sum probabilities in inclusive interval.
sumProbabilities :: DiscreteDistr d => d -> Int -> Int -> Double
sumProbabilities d low hi =
-- Return value is forced to be less than 1 to guard againist roundoff errors.
-- ATTENTION! this check should be removed for testing or it could mask bugs.
min 1 . U.sum . U.map (probability d) $ U.enumFromTo low hi
{-# INLINE sumProbabilities #-}