# bos/statistics

Merge branch 'master' of github.com:Shimuuar/statistics

• Loading branch information...
2 parents 509d605 + 1e4dd69 commit 01fbdb01ec863517a59c5fe53dce0a57ab3a6020 Shimuuar committed Jan 13, 2013
5 Statistics/Distribution/Normal.hs
 @@ -91,7 +91,10 @@ normalDistr m sd -- sample. Variance is estimated using maximum likelihood method -- (biased estimation). normalFromSample :: S.Sample -> NormalDistribution -normalFromSample a = normalDistr (S.mean a) (S.stdDev a) +normalFromSample xs + = normalDistr m (sqrt v) + where + (m,v) = S.meanVariance xs density :: NormalDistribution -> Double -> Double density d x = exp (-xm * xm / (2 * sd * sd)) / ndPdfDenom d
33 Statistics/Function.hs
 @@ -77,14 +77,25 @@ minMax = fini . G.foldl' go (MM (1/0) (-1/0)) -- non-negative integer. If the given value is already a power of -- two, it is returned unchanged. If negative, zero is returned. nextHighestPowerOfTwo :: Int -> Int -nextHighestPowerOfTwo n = o + 1 - where m = n - 1 - o = m - .|. (m `shiftR` 1) - .|. (m `shiftR` 2) - .|. (m `shiftR` 4) - .|. (m `shiftR` 8) - .|. (m `shiftR` 16) -#if WORD_SIZE_IN_BITS == 64 - .|. (m `shiftR` 32) -#endif +nextHighestPowerOfTwo n +#if WORD_SIZE_IN_BITS == 64 + = 1 + _i32 +#else + = 1 + i16 +#endif + where + i0 = n - 1 + i1 = i0 .|. i0 `shiftR` 1 + i2 = i1 .|. i1 `shiftR` 2 + i4 = i2 .|. i2 `shiftR` 4 + i8 = i4 .|. i4 `shiftR` 8 + i16 = i8 .|. i8 `shiftR` 16 + _i32 = i16 .|. i16 `shiftR` 32 +-- It could be implemented as +-- +-- > nextHighestPowerOfTwo n = 1 + foldl' go (n-1) [1, 2, 4, 8, 16, 32] +-- where go m i = m .|. m `shiftR` i +-- +-- But GHC do not inline foldl (probably because it's recursive) and +-- as result function walks list of boxed ints. Hand rolled version +-- uses unboxed arithmetic.
6 Statistics/Resampling/Bootstrap.hs
 @@ -22,7 +22,11 @@ module Statistics.Resampling.Bootstrap import Control.DeepSeq (NFData) import Control.Exception (assert) -import Control.Monad.Par (runPar, parMap) +-- Workaround for the bug https://github.com/simonmar/monad-par/issues/23 +-- As suggested by Simon Marlow direct scheduler is used. +-- Issue #45 tracks this workaround. +import Control.Monad.Par (parMap) +import Control.Monad.Par.Scheds.Direct (runPar) import Data.Data (Data) import Data.Typeable (Typeable) import Data.Vector.Unboxed ((!))
22 tests/Tests/Distribution.hs
 @@ -88,10 +88,10 @@ discreteDistrTests t = testGroup ("Tests for: " ++ typeName t) \$ cdfTests :: (Param d, Distribution d, QC.Arbitrary d, Show d) => T d -> [Test] cdfTests t = [ testProperty "C.D.F. sanity" \$ cdfSanityCheck t - , testProperty "CDF limit at +∞" \$ cdfLimitAtPosInfinity t - , testProperty "CDF limit at -∞" \$ cdfLimitAtNegInfinity t - , testProperty "CDF at +∞ = 1" \$ cdfAtPosInfinity t - , testProperty "CDF at -∞ = 1" \$ cdfAtNegInfinity t + , testProperty "CDF limit at +inf" \$ cdfLimitAtPosInfinity t + , testProperty "CDF limit at -inf" \$ cdfLimitAtNegInfinity t + , testProperty "CDF at +inf = 1" \$ cdfAtPosInfinity t + , testProperty "CDF at -inf = 1" \$ cdfAtNegInfinity t , testProperty "CDF is nondecreasing" \$ cdfIsNondecreasing t , testProperty "1-CDF is correct" \$ cdfComplementIsCorrect t ] @@ -174,9 +174,9 @@ probSanityCheck _ d x = p >= 0 && p <= 1 -- Check that discrete CDF is correct discreteCDFcorrect :: (DiscreteDistr d) => T d -> d -> Int -> Int -> Property discreteCDFcorrect _ d a b - = printTestCase (printf "CDF = %g" p1) - \$ printTestCase (printf "Sum = %g" p2) - \$ printTestCase (printf "Δ = %g" (abs (p1 - p2))) + = printTestCase (printf "CDF = %g" p1) + \$ printTestCase (printf "Sum = %g" p2) + \$ printTestCase (printf "Delta = %g" (abs (p1 - p2))) \$ abs (p1 - p2) < 3e-10 -- Avoid too large differeneces. Otherwise there is to much to sum -- @@ -280,17 +280,17 @@ unitTests = testGroup "Unit tests" where -- Student-T testStudentPDF ndf x exact - = testAssertion (printf "density (studentT %f) %f ≈ %f" ndf x exact) + = testAssertion (printf "density (studentT %f) %f ~ %f" ndf x exact) \$ eq 1e-5 exact (density (studentT ndf) x) testStudentCDF ndf x exact - = testAssertion (printf "cumulative (studentT %f) %f ≈ %f" ndf x exact) + = testAssertion (printf "cumulative (studentT %f) %f ~ %f" ndf x exact) \$ eq 1e-5 exact (cumulative (studentT ndf) x) -- F-distribution testFdistrPDF n m x exact - = testAssertion (printf "density (fDistribution %i %i) %f ≈ %f [got %f]" n m x exact d) + = testAssertion (printf "density (fDistribution %i %i) %f ~ %f [got %f]" n m x exact d) \$ eq 1e-5 exact d where d = density (fDistribution n m) x testFdistrCDF n m x exact - = testAssertion (printf "cumulative (fDistribution %i %i) %f ≈ %f [got %f]" n m x exact d) + = testAssertion (printf "cumulative (fDistribution %i %i) %f ~ %f [got %f]" n m x exact d) \$ eq 1e-5 exact d where d = cumulative (fDistribution n m) x
11 tests/Tests/Function.hs
 @@ -7,13 +7,15 @@ import Test.QuickCheck import Test.Framework import Test.Framework.Providers.QuickCheck2 +import Tests.Helpers import Statistics.Function tests :: Test tests = testGroup "S.Function" - [ testProperty "Sort is sort" p_sort + [ testProperty "Sort is sort" p_sort + , testAssertion "nextHighestPowerOfTwo is OK" p_nextHighestPowerOfTwo ] @@ -23,4 +25,9 @@ p_sort xs = where v = sort \$ U.fromList xs - +p_nextHighestPowerOfTwo :: Bool +p_nextHighestPowerOfTwo + = all (\(good, is) -> all ((==good) . nextHighestPowerOfTwo) is) lists + where + pows = [1 .. 17] + lists = [ (2^m, [2^n+1 .. 2^m]) | (n,m) <- pows `zip` tail pows ]

#### 0 comments on commit `01fbdb0`

Please sign in to comment.