Skip to content
Browse files

Initial revision

  • Loading branch information...
0 parents commit 4b3c1f8f7e3d55297754404f7180656b793ea49b @b4winckler committed May 7, 2012
Showing with 262 additions and 0 deletions.
  1. +1 −0 .gitignore
  2. +30 −0 LICENSE
  3. +2 −0 Setup.hs
  4. +40 −0 sigscore.cabal
  5. +70 −0 src/Main.hs
  6. +39 −0 src/Parsers.hs
  7. +80 −0 src/Utility.hs
1 .gitignore
@@ -0,0 +1 @@
+dist/
30 LICENSE
@@ -0,0 +1,30 @@
+Copyright (c)2012, Björn Winckler <bjorn.winckler@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following
+ disclaimer in the documentation and/or other materials provided
+ with the distribution.
+
+ * Neither the name of Björn Winckler <bjorn.winckler@gmail.com> nor the names of other
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2 Setup.hs
@@ -0,0 +1,2 @@
+import Distribution.Simple
+main = defaultMain
40 sigscore.cabal
@@ -0,0 +1,40 @@
+name: sigscore
+version: 0.1
+synopsis: Signature score tool
+homepage: https://github.com/b4winckler/up-down-signature
+license: BSD3
+license-file: LICENSE
+author: Björn Winckler <bjorn.winckler@gmail.com>
+maintainer: Björn Winckler <bjorn.winckler@gmail.com>
+copyright: 2012 Björn Winckler
+category: Math
+build-type: Simple
+cabal-version: >= 1.2
+extra-source-files: README.markdown
+
+description:
+ Calculate signature scores for point configurations.
+
+executable sigscore
+ main-is: Main.hs
+ hs-source-dirs: src
+ build-depends: base >= 4 && < 5,
+ bytestring,
+ bytestring-lexing,
+ mwc-random >= 0.12,
+ up-down-signature >= 0.1
+
+-- test-suite tests
+-- type: exitcode-stdio-1.0
+-- hs-source-dirs: tests
+-- main-is: Tests.hs
+-- ghc-options: -Wall
+-- build-depends: base
+-- -- QuickCheck >= 2
+-- -- test-framework,
+-- -- test-framework-quickcheck2,
+-- -- test-framework-hunit,
+
+source-repository head
+ type: git
+ location: https://github.com/b4winckler/sigscore
70 src/Main.hs
@@ -0,0 +1,70 @@
+{-# LANGUAGE BangPatterns #-}
+module Main where
+
+import Control.Applicative ((<$>))
+import Control.Monad (forM_, when)
+import Data.List (nub, sort)
+import Data.Maybe (catMaybes)
+import System.Environment (getArgs)
+import System.Exit (exitSuccess)
+import System.Random.MWC (withSystemRandom)
+
+import Parsers (parseDoubles)
+import Utility (logStr, logStrLn, exitBecause, bin, pad)
+
+import qualified Data.ByteString.Lazy.Char8 as B
+import qualified Data.UpDownSignature as Sig
+
+
+main = withSystemRandom $ \gen -> do
+ as <- getArgs
+ ls <- B.lines <$> if null as then B.getContents else B.readFile (head as)
+
+ when (null ls) exitSuccess -- Nothing to do!
+
+ -- The first line is a list of integers representing categories
+ let cats = map truncate $ catMaybes $ parseDoubles $ head ls
+ ncats = length cats
+
+ -- Sanity check and log info about categories
+ logStrLn $ foundCategories cats
+ when (ncats < 3) $ exitBecause tooFewCategories
+ when (length (nub cats) > 20) $ logStrLn tooManyCategories
+
+ -- Each line is a list of doubles, with as many elements as there were in the
+ -- categories line. Missing values and NAs are allowed. Go through each and
+ -- compute the signature score.
+ forM_ (zip [1..] $ tail ls) $ \(lnum,line) -> do
+ let elems = parseDoubles line
+ nelems = length elems
+ if ncats == nelems
+ then do
+ let bins = map catMaybes $ bin cats elems
+ s <- Sig.approxScore gen numPaths $ bins
+ -- let s = Sig.score $ trimBins binSize $ map catMaybes $ bin cats elems
+ putStrLn $ show s
+ when (lnum `mod` 100 == 0) $ logStr $ " " ++ pad 4 (show lnum)
+ when (lnum `mod` 1000 == 0) $ logStrLn ""
+ else
+ logStrLn $ skippingLine lnum nelems ncats
+
+
+numPaths = 10000
+binSize = 4
+
+tooManyCategories = "\
+\WARNING: More than 20 categories. Break up the data into fewer categories\n\
+\ to cut down on processing times."
+
+tooFewCategories = "\
+\ERROR: First line must be a list of at least three integer categories."
+
+foundCategories xs = "Found " ++ show n ++ " "
+ ++ if n == 1 then "category" else "categories" ++ ": " ++ show (sort cs)
+ where
+ cs = nub xs
+ n = length cs
+
+skippingLine lnum actual expected =
+ "WARNING: Skipping line " ++ show lnum ++
+ " (expected " ++ show expected ++ " elements, got " ++ show actual ++ ")"
39 src/Parsers.hs
@@ -0,0 +1,39 @@
+module Parsers (parseDoubles) where
+
+import Data.List (unfoldr)
+import qualified Data.ByteString.Lazy.Char8 as B
+import qualified Data.ByteString.Lex.Lazy.Double as B
+
+-- | Parse doubles separated by space or separator character (as determined by
+-- 'isSeparator'). Anything else is translated into a 'Nothing'.
+--
+-- Examples:
+--
+-- > parseDoubles "1 2.3" = [Just 1.0,Just 2.3]
+-- > parseDoubles "1,,2.3," = [Just 1.0,Nothing,Just 2.3,Nothing]
+-- > parseDoubles "1 NA 3" = [Just 1.0,Nothing,Just 3.0]
+--
+parseDoubles :: B.ByteString -> [Maybe Double]
+parseDoubles bs | B.null bs = []
+ | isSeparator (B.last bs) = ds ++ [Nothing]
+ | otherwise = ds
+ where
+ ds = unfoldr go (skipWhitespace bs)
+ go xs | B.null xs = Nothing
+ | otherwise = case B.readDouble xs of
+ Just (d, ys) -> Just (Just d, skipSep ys)
+ Nothing -> Just (Nothing, skipSep $ skipNA xs)
+
+isWhitespace c = c == ' ' || c == '\t'
+
+isSeparator c = c `elem` ",;"
+
+skipWhitespace = B.dropWhile isWhitespace
+
+skipNA = B.dropWhile (not . \c -> isWhitespace c || isSeparator c)
+
+skipSep = skipWhitespace . skip . skipWhitespace
+ where
+ skip bs = case B.uncons bs of
+ Just (x, bs') -> if isSeparator x then bs' else B.cons x bs'
+ Nothing -> B.empty
80 src/Utility.hs
@@ -0,0 +1,80 @@
+-- |
+-- Utility functions.
+--
+module Utility (
+ alternate
+ , bin
+ , equating
+ , exitBecause
+ , logStr
+ , logStrLn
+ , pad
+ , pleat
+ , trimBins
+ ) where
+
+import Data.Ord (comparing)
+import Data.List (sort, sortBy, groupBy)
+import System.Exit (exitFailure)
+import System.IO (hPutStr, hPutStrLn, stderr)
+
+
+-- | Put data in bins defined by the given categories.
+bin :: Ord a => [a] -> [b] -> [[b]]
+bin cats = map (snd . unzip)
+ . groupBy (equating fst)
+ . sortBy (comparing fst)
+ . zip cats
+
+-- | Trim bins so that each bin contains at most the given number of elements.
+trimBins :: Ord a => Int -> [[a]] -> [[a]]
+trimBins n = map (take n . pleat)
+
+-- | Take a list and reorder it so that the median element comes first and the
+-- extremal elements come last. Example:
+--
+-- > pleat [1..9] = [5,4,6,3,7,2,8,1]
+-- > pleat [0..9] = [5,4,6,3,7,2,8,1,9,0]
+--
+-- Note that the input need not be sorted.
+pleat :: Ord a => [a] -> [a]
+pleat xs = alternate bs (reverse as)
+ where
+ (as,bs) = splitAt (length xs `div` 2) $ sort xs
+
+-- | Combine two lists into one by alternately taking an element from the
+-- first, then from the second. For example:
+--
+-- > alternate [1..3] [5..9] = [1,5,2,6,3,7,8,9]
+--
+-- Note that no elements from either list are lost.
+alternate :: [a] -> [a] -> [a]
+alternate (x:xs) (y:ys) = x : y : alternate xs ys
+alternate xs [] = xs
+alternate [] ys = ys
+
+-- | Use e.g. with sorting routines as in
+--
+-- > sortBy (equating fst)
+--
+equating :: Eq a => (t -> a) -> t -> t -> Bool
+equating f x y = f x == f y
+
+-- | Exit program with failure and log reason to stderr.
+exitBecause :: String -> IO b
+exitBecause reason = logStrLn reason >> exitFailure
+
+-- | Log string with newline to stderr
+logStrLn :: String -> IO ()
+logStrLn = hPutStrLn stderr
+
+-- | Log string to stderr
+logStr :: String -> IO ()
+logStr = hPutStr stderr
+
+-- | Pad string with spaces so that it is at least 'n' chars wide
+pad :: Int -> String -> String
+pad n s | n > k = replicate (n-k) ' ' ++ s
+ | otherwise = s
+ where
+ k = length s

0 comments on commit 4b3c1f8

Please sign in to comment.
Something went wrong with that request. Please try again.