Skip to content
Browse files

Lots of doco tweaking.

  • Loading branch information...
1 parent 06f2dba commit 22f9781bf146971b95eceb079b3a3df024d113b3 @bos committed Sep 13, 2009
View
46 Statistics/Autocorrelation.hs
@@ -0,0 +1,46 @@
+-- |
+-- Module : Statistics.Autocorrelation
+-- Copyright : (c) 2009 Bryan O'Sullivan
+-- License : BSD3
+--
+-- Maintainer : bos@serpentine.com
+-- Stability : experimental
+-- Portability : portable
+--
+-- Functions for computing autocovariance and autocorrelation of a
+-- sample.
+
+module Statistics.Autocorrelation
+ (
+ autocovariance
+ , autocorrelation
+ ) where
+
+import Data.Array.Vector
+import Statistics.Sample (Sample, mean)
+
+-- | Compute the autocovariance of a sample, i.e. the covariance of
+-- the sample against a shifted version of itself.
+autocovariance :: Sample -> UArr Double
+autocovariance a = mapU f . enumFromToU 0 $ l-2
+ where
+ f k = sumU (zipWithU (*) (takeU (l-k) c) (sliceU c k (l-k)))
+ / fromIntegral l
+ c = mapU (subtract (mean a)) a
+ l = lengthU a
+
+-- | Compute the autocorrelation function of a sample, and the upper
+-- and lower bounds of confidence intervals for each element.
+--
+-- /Note/: The calculation of the 95% confidence interval assumes a
+-- stationary Gaussian process.
+autocorrelation :: Sample -> (UArr Double, UArr Double, UArr Double)
+autocorrelation a = (r, ci (-), ci (+))
+ where
+ r = mapU (/ headU c) c
+ where c = autocovariance a
+ dllse = mapU f . scanl1U (+) . mapU square $ r
+ where f v = 1.96 * sqrt ((v * 2 + 1) / l)
+ l = fromIntegral (lengthU a)
+ ci f = consU 1 . tailU . mapU (f (-1/l)) $ dllse
+ square x = x * x
View
2 Statistics/Constants.hs
@@ -25,7 +25,7 @@ m_huge :: Double
m_huge = 1.797693e308
{-# INLINE m_huge #-}
--- | The largest 'Int' /x/ such that @2**(x-1)@ is approximately
+-- | The largest 'Int' /x/ such that 2**(/x/-1) is approximately
-- representable as a 'Double'.
m_max_exp :: Int
m_max_exp = 1024
View
12 Statistics/Distribution.hs
@@ -21,15 +21,15 @@ module Statistics.Distribution
-- | The interface shared by all probability distributions.
class Distribution d where
-- | Probability density function. The probability that a
- -- stochastic variable @x@ has the value @X@, i.e. @P(x=X)@.
+ -- stochastic variable /x/ has the value /X/, i.e. P(/x/=/X/).
probability :: d -> Double -> Double
-- | Cumulative distribution function. The probability that a
- -- stochastic variable @x@ is less than @X@, i.e. @P(x<X)@.
+ -- stochastic variable /x/ is less than /X/, i.e. P(/x/</X/).
cumulative :: d -> Double -> Double
-- | Inverse of the cumulative distribution function. The value
- -- @X@ for which @P(x<X)@.
+ -- /X/ for which P(/x/</X/).
inverse :: d -> Double -> Double
class Distribution d => Mean d where
@@ -38,14 +38,14 @@ class Distribution d => Mean d where
class Mean d => Variance d where
variance :: d -> Double
--- | Approximate the value of @X@ for which @P(x>X) == p@.
+-- | Approximate the value of /X/ for which P(/x/>/X/)=/p/.
--
-- This method uses a combination of Newton-Raphson iteration and
-- bisection with the given guess as a starting point. The upper and
-- lower bounds specify the interval in which the probability
--- distribution reaches the value @p@.
+-- distribution reaches the value /p/.
findRoot :: Distribution d => d
- -> Double -- ^ Probability @p@
+ -> Double -- ^ Probability /p/
-> Double -- ^ Initial guess
-> Double -- ^ Lower bound on interval
-> Double -- ^ Upper bound on interval
View
6 Statistics/Function.hs
@@ -21,15 +21,15 @@ import Data.Array.Vector.Algorithms.Combinators (apply)
import Data.Array.Vector ((:*:)(..), UA, UArr, foldlU)
import qualified Data.Array.Vector.Algorithms.Intro as I
--- | Sort.
+-- | Sort an array.
sort :: (UA e, Ord e) => UArr e -> UArr e
sort = apply I.sort
{-# INLINE sort #-}
--- | Partially sort, such that the least @k@ elements will be
+-- | Partially sort an array, such that the least /k/ elements will be
-- at the front.
partialSort :: (UA e, Ord e) =>
- Int -- ^ The number @k@ of least elements
+ Int -- ^ The number /k/ of least elements.
-> UArr e
-> UArr e
partialSort k = apply (\a -> I.partialSort a k)
View
9 Statistics/Math.hs
@@ -62,8 +62,9 @@ n `choose` k
data F = F {-# UNPACK #-} !Word64 {-# UNPACK #-} !Word64
--- | Compute the factorial function. Returns &#8734; if the input is
--- above 170.
+-- | Compute the factorial function /n/!. Returns &#8734; if the
+-- input is above 170 (above which the result cannot be represented by
+-- a 64-bit 'Double').
factorial :: Int -> Double
factorial n
| n < 0 = error "Statistics.Math.factorial: negative input"
@@ -89,7 +90,7 @@ logFactorial n
2.7777777777778e-3) * y + 8.3333333333333e-2
{-# INLINE logFactorial #-}
--- | Compute the incomplete gamma integral function, &#947;(/s/,/x/).
+-- | Compute the incomplete gamma integral function &#947;(/s/,/x/).
-- Uses Algorithm AS 239 by Shea.
incompleteGamma :: Double -- ^ /s/
-> Double -- ^ /x/
@@ -135,7 +136,7 @@ incompleteGamma x p
-- Adapted from http://people.sc.fsu.edu/~burkardt/f_src/asa245/asa245.html
--- | Compute the logarithm of the gamma function, &#915;(/x/). Uses
+-- | Compute the logarithm of the gamma function &#915;(/x/). Uses
-- Algorithm AS 245 by Macleod.
--
-- Gives an accuracy of 10&#8211;12 significant decimal digits, except
View
73 Statistics/Quantile.hs
@@ -8,15 +8,20 @@
-- Stability : experimental
-- Portability : portable
--
--- Functions for approximating quantiles.
+-- Functions for approximating quantiles, i.e. points taken at regular
+-- intervals from the cumulative distribution function of a random
+-- variable.
+--
+-- The number of quantiles is described below by the variable /q/, so
+-- with /q/=4, a 4-quantile (also known as a /quartile/) has 4
+-- intervals, and contains 5 points. The parameter /k/ describes the
+-- desired point, where 0 &#8804; /k/ &#8804; /q/.
module Statistics.Quantile
(
- -- * Types
- ContParam(..)
-
-- * Quantile estimation functions
- , weightedAvg
+ weightedAvg
+ , ContParam(..)
, continuousBy
-- * Parameters for the continuous sample method
@@ -33,14 +38,15 @@ module Statistics.Quantile
import Control.Exception (assert)
import Data.Array.Vector (allU, indexU, lengthU)
+import Statistics.Constants (m_epsilon)
import Statistics.Function (partialSort)
import Statistics.Types (Sample)
--- | Use the weighted average method to estimate the @k@th
--- @q@-quantile of a sample.
-weightedAvg :: Int -- ^ @k@, the desired quantile
- -> Int -- ^ @q@, the number of quantiles
- -> Sample -- ^ @x@, the sample data
+-- | Estimate the /k/th /q/-quantile of a sample, using the weighted
+-- average method.
+weightedAvg :: Int -- ^ /k/, the desired quantile.
+ -> Int -- ^ /q/, the number of quantiles.
+ -> Sample -- ^ /x/, the sample data.
-> Double
weightedAvg k q x =
assert (q >= 2) .
@@ -57,15 +63,17 @@ weightedAvg k q x =
sx = partialSort (j+2) x
{-# INLINE weightedAvg #-}
--- | Parameters @a@ and @b@ to the 'quantileBy' function.
+-- | Parameters /a/ and /b/ to the 'continuousBy' function.
data ContParam = ContParam {-# UNPACK #-} !Double {-# UNPACK #-} !Double
--- | Using the continuous sample method with the given parameters,
--- estimate the @k@th @q@-quantile of a sample @x@.
-continuousBy :: ContParam -- ^ Parameters @a@ and @b@
- -> Int -- ^ @k@, the desired quantile
- -> Int -- ^ @q@, the number of quantiles
- -> Sample -- ^ @x@, the sample data
+-- | Estimate the /k/th /q/-quantile of a sample /x/, using the
+-- continuous sample method with the given parameters. This is the
+-- method used by most statistical software, such as R, Mathematica,
+-- SPSS, and S.
+continuousBy :: ContParam -- ^ Parameters /a/ and /b/.
+ -> Int -- ^ /k/, the desired quantile.
+ -> Int -- ^ /q/, the number of quantiles.
+ -> Sample -- ^ /x/, the sample data.
-> Double
continuousBy (ContParam a b) k q x =
assert (q >= 2) .
@@ -80,50 +88,51 @@ continuousBy (ContParam a b) k q x =
h | abs r < eps = 0
| otherwise = r
where r = t - fromIntegral j
- eps = 8.881784e-16
+ eps = m_epsilon * 4
n = lengthU x
item = indexU sx . bracket
sx = partialSort (bracket j + 1) x
bracket m = min (max m 0) (n - 1)
{-# INLINE continuousBy #-}
--- | California Department of Public Works definition, @a=0,b=1@.
--- Gives a linear interpolation of the empirical CDF.
--- This corresponds to method 4 in R and Mathematica.
+-- | California Department of Public Works definition, /a/=0, /b/=1.
+-- Gives a linear interpolation of the empirical CDF. This
+-- corresponds to method 4 in R and Mathematica.
cadpw :: ContParam
cadpw = ContParam 0 1
{-# INLINE cadpw #-}
--- | Hazen's definition, @a=0.5,b=0.5@. This is claimed to be popular
--- among hydrologists. This corresponds to method 5 in R and
+-- | Hazen's definition, /a/=0.5, /b/=0.5. This is claimed to be
+-- popular among hydrologists. This corresponds to method 5 in R and
-- Mathematica.
hazen :: ContParam
hazen = ContParam 0.5 0.5
{-# INLINE hazen #-}
--- | SPSS definition, @a=0,b=0@, also known as Weibull's definition.
--- This corresponds to method 6 in R and Mathematica.
+-- | Definition used by the SPSS statistics application, with /a/=0,
+-- /b/=0 (also known as Weibull's definition). This corresponds to
+-- method 6 in R and Mathematica.
spss :: ContParam
spss = ContParam 0 0
{-# INLINE spss #-}
--- | S definition, @a=1,b=1@. The interpolation points divide the
--- sample range into @n-1@ intervals. This corresponds to method 7 in
--- R and Mathematica.
+-- | Definition used by the S statistics application, with /a/=1,
+-- /b/=1. The interpolation points divide the sample range into @n-1@
+-- intervals. This corresponds to method 7 in R and Mathematica.
s :: ContParam
s = ContParam 1 1
{-# INLINE s #-}
--- | Median unbiased definition, @a=1/3,b=1/3@. The resulting quantile
--- estimates are approximately median unbiased regardless of the
--- distribution of @x@. This corresponds to method 8 in R and
+-- | Median unbiased definition, /a/=1\/3, /b/=1\/3. The resulting
+-- quantile estimates are approximately median unbiased regardless of
+-- the distribution of /x/. This corresponds to method 8 in R and
-- Mathematica.
medianUnbiased :: ContParam
medianUnbiased = ContParam third third
where third = 1/3
{-# INLINE medianUnbiased #-}
--- | Normal unbiased definition, @a=3/8,b=3/8@. An approximately
+-- | Normal unbiased definition, /a/=3\/8, /b/=3\/8. An approximately
-- unbiased estimate if the empirical distribution approximates the
-- normal distribution. This corresponds to method 9 in R and
-- Mathematica.
View
3 Statistics/Resampling.hs
@@ -25,7 +25,8 @@ import Statistics.Types (Estimator, Sample)
import System.Random.Mersenne (MTGen, random)
-- | A resample drawn randomly, with replacement, from a set of data
--- points.
+-- points. Distinct from a normal array to make it harder for your
+-- humble author's brain to go wrong.
newtype Resample = Resample {
fromResample :: UArr Double
} deriving (Eq, Show)
View
4 Statistics/Sample.hs
@@ -12,8 +12,10 @@
module Statistics.Sample
(
+ -- * Types
+ Sample
-- * Statistics of location
- mean
+ , mean
, harmonicMean
, geometricMean
View
5 Statistics/Types.hs
@@ -18,9 +18,12 @@ module Statistics.Types
import Data.Array.Vector (UArr)
+-- | Sample data.
type Sample = UArr Double
--- | A function that estimates a property of a sample, such as 'mean'.
+-- | A function that estimates a property of a sample, such as its
+-- 'mean'.
type Estimator = Sample -> Double
+-- | Weights for affecting the importance of elements of a sample.
type Weights = UArr Double
View
20 statistics.cabal
@@ -1,7 +1,20 @@
name: statistics
-version: 0.1
-synopsis: A library of statistical types, data, and functions.
-description: A library of statistical types, data, and functions.
+version: 0.2
+synopsis: A library of statistical types, data, and functions
+description:
+ This library provides a number of common functions and types useful
+ in statistics. Our focus is on high performance, numerical
+ robustness, and use of good algorithms. Where possible, we provide
+ references to the statistical literature.
+ .
+ The library's facilities can be divided into three broad categories:
+ .
+ Working with widely used discrete and continuous probability
+ distributions. (There are dozens of exotic distributions in use; we
+ focus on the most common.)
+ .
+ Computing with sample data: quantile estimation, kernel density
+ estimation, bootstrap methods, and autocorrelation analysis.
license: BSD3
license-file: LICENSE
homepage: http://darcs.serpentine.com/statistics
@@ -15,6 +28,7 @@ extra-source-files: README
library
exposed-modules:
+ Statistics.Autocorrelation
Statistics.Constants
Statistics.Distribution
Statistics.Distribution.Binomial

0 comments on commit 22f9781

Please sign in to comment.
Something went wrong with that request. Please try again.