bos/statistics

Add support for more fast jackknife stats

1 parent 1709e02 commit 1d4e231f5708613418506a67e1931b4361370571 committed Jan 30, 2014
Showing with 34 additions and 11 deletions.
1. +3 −2 ChangeLog
2. +29 −9 Statistics/Resampling.hs
3. +2 −0 Statistics/Types.hs
5 ChangeLog
 @@ -6,8 +6,9 @@ Changes in 0.10.6.0 the jackknife function to potentially use more efficient jackknife implementations. - * jackknifeMean, jackknifeVariance: new functions. These have O(n) cost - instead of the O(n^2) cost of the standard jackknife. + * jackknifeMean, jackknifeStdDev, jackknifeVariance, + jackknifeVarianceUnb: new functions. These have O(n) cost instead + of the O(n^2) cost of the standard jackknife. * The mean function has been renamed to welfordMean; a new implementation of mean has better numerical accuracy in almost all
38 Statistics/Resampling.hs
 @@ -17,6 +17,8 @@ module Statistics.Resampling , jackknife , jackknifeMean , jackknifeVariance + , jackknifeVarianceUnb + , jackknifeStdDev , resample , estimate ) where @@ -34,7 +36,7 @@ import GHC.Conc (numCapabilities) import GHC.Generics (Generic) import Numeric.Sum (Summation(..), kbn) import Statistics.Function (indices) -import Statistics.Sample (mean, variance) +import Statistics.Sample (mean, stdDev, variance, varianceUnbiased) import Statistics.Types (Estimator(..), Sample) import System.Random.MWC (Gen, initialize, uniform, uniformVector) import qualified Data.Vector.Generic as G @@ -95,15 +97,19 @@ resample gen ests numResamples samples = do -- | Run an 'Estimator' over a sample. estimate :: Estimator -> Sample -> Double -estimate Mean = mean -estimate Variance = variance +estimate Mean = mean +estimate Variance = variance +estimate VarianceUnbiased = varianceUnbiased +estimate StdDev = stdDev estimate (Function est) = est -- | /O(n) or O(n^2)/ Compute a statistical estimate repeatedly over a -- sample, each time omitting a successive element. jackknife :: Estimator -> Sample -> U.Vector Double -jackknife Mean sample = jackknifeMean sample -jackknife Variance sample = jackknifeVariance sample +jackknife Mean sample = jackknifeMean sample +jackknife Variance sample = jackknifeVariance sample +jackknife VarianceUnbiased sample = jackknifeVarianceUnb sample +jackknife StdDev sample = jackknifeStdDev sample jackknife (Function est) sample | G.length sample == 1 = singletonErr "jackknife" | otherwise = U.map f . indices \$ sample @@ -118,9 +124,11 @@ jackknifeMean samp l = fromIntegral (len - 1) len = G.length samp --- | /O(n)/ Compute the jackknife variance of a sample. -jackknifeVariance :: Sample -> U.Vector Double -jackknifeVariance samp +-- | /O(n)/ Compute the jackknife variance of a sample with a +-- correction factor @c@, so we can get either the regular or +-- \"unbiased\" variance. +jackknifeVariance_ :: Double -> Sample -> U.Vector Double +jackknifeVariance_ c samp | len == 1 = singletonErr "jackknifeVariance" | otherwise = G.zipWith4 go als ars bls brs where @@ -133,9 +141,21 @@ jackknifeVariance samp n = fromIntegral len go al ar bl br = (al + ar - (b * b) / q) / q where b = bl + br - q = n - 1 + q = n - (1 + c) len = G.length samp +-- | /O(n)/ Compute the unbiased jackknife variance of a sample. +jackknifeVarianceUnb :: Sample -> U.Vector Double +jackknifeVarianceUnb = jackknifeVariance_ 1 + +-- | /O(n)/ Compute the jackknife variance of a sample. +jackknifeVariance :: Sample -> U.Vector Double +jackknifeVariance = jackknifeVariance_ 0 + +-- | /O(n)/ Compute the jackknife standard deviation of a sample. +jackknifeStdDev :: Sample -> U.Vector Double +jackknifeStdDev = G.map sqrt . jackknifeVarianceUnb + pfxSumL :: U.Vector Double -> U.Vector Double pfxSumL = G.map kbn . G.scanl add zero
2 Statistics/Types.hs
 @@ -32,6 +32,8 @@ type WeightedSample = U.Vector (Double,Double) -- when possible. data Estimator = Mean | Variance + | VarianceUnbiased + | StdDev | Function (Sample -> Double) -- | Weights for affecting the importance of elements of a sample.