Skip to content

Commit

Permalink
Initial revision
Browse files Browse the repository at this point in the history
  • Loading branch information
b4winckler committed May 7, 2012
0 parents commit 4b3c1f8
Show file tree
Hide file tree
Showing 7 changed files with 262 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
@@ -0,0 +1 @@
dist/
30 changes: 30 additions & 0 deletions LICENSE
@@ -0,0 +1,30 @@
Copyright (c)2012, Björn Winckler <bjorn.winckler@gmail.com>

All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.

* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials provided
with the distribution.

* Neither the name of Björn Winckler <bjorn.winckler@gmail.com> nor the names of other
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2 changes: 2 additions & 0 deletions Setup.hs
@@ -0,0 +1,2 @@
import Distribution.Simple
main = defaultMain
40 changes: 40 additions & 0 deletions sigscore.cabal
@@ -0,0 +1,40 @@
name: sigscore
version: 0.1
synopsis: Signature score tool
homepage: https://github.com/b4winckler/up-down-signature
license: BSD3
license-file: LICENSE
author: Björn Winckler <bjorn.winckler@gmail.com>
maintainer: Björn Winckler <bjorn.winckler@gmail.com>
copyright: 2012 Björn Winckler
category: Math
build-type: Simple
cabal-version: >= 1.2
extra-source-files: README.markdown

description:
Calculate signature scores for point configurations.

executable sigscore
main-is: Main.hs
hs-source-dirs: src
build-depends: base >= 4 && < 5,
bytestring,
bytestring-lexing,
mwc-random >= 0.12,
up-down-signature >= 0.1

-- test-suite tests
-- type: exitcode-stdio-1.0
-- hs-source-dirs: tests
-- main-is: Tests.hs
-- ghc-options: -Wall
-- build-depends: base
-- -- QuickCheck >= 2
-- -- test-framework,
-- -- test-framework-quickcheck2,
-- -- test-framework-hunit,

source-repository head
type: git
location: https://github.com/b4winckler/sigscore
70 changes: 70 additions & 0 deletions src/Main.hs
@@ -0,0 +1,70 @@
{-# LANGUAGE BangPatterns #-}
module Main where

import Control.Applicative ((<$>))
import Control.Monad (forM_, when)
import Data.List (nub, sort)
import Data.Maybe (catMaybes)
import System.Environment (getArgs)
import System.Exit (exitSuccess)
import System.Random.MWC (withSystemRandom)

import Parsers (parseDoubles)
import Utility (logStr, logStrLn, exitBecause, bin, pad)

import qualified Data.ByteString.Lazy.Char8 as B
import qualified Data.UpDownSignature as Sig


main = withSystemRandom $ \gen -> do
as <- getArgs
ls <- B.lines <$> if null as then B.getContents else B.readFile (head as)

when (null ls) exitSuccess -- Nothing to do!

-- The first line is a list of integers representing categories
let cats = map truncate $ catMaybes $ parseDoubles $ head ls
ncats = length cats

-- Sanity check and log info about categories
logStrLn $ foundCategories cats
when (ncats < 3) $ exitBecause tooFewCategories
when (length (nub cats) > 20) $ logStrLn tooManyCategories

-- Each line is a list of doubles, with as many elements as there were in the
-- categories line. Missing values and NAs are allowed. Go through each and
-- compute the signature score.
forM_ (zip [1..] $ tail ls) $ \(lnum,line) -> do
let elems = parseDoubles line
nelems = length elems
if ncats == nelems
then do
let bins = map catMaybes $ bin cats elems
s <- Sig.approxScore gen numPaths $ bins
-- let s = Sig.score $ trimBins binSize $ map catMaybes $ bin cats elems
putStrLn $ show s
when (lnum `mod` 100 == 0) $ logStr $ " " ++ pad 4 (show lnum)
when (lnum `mod` 1000 == 0) $ logStrLn ""
else
logStrLn $ skippingLine lnum nelems ncats


numPaths = 10000
binSize = 4

tooManyCategories = "\
\WARNING: More than 20 categories. Break up the data into fewer categories\n\
\ to cut down on processing times."

tooFewCategories = "\
\ERROR: First line must be a list of at least three integer categories."

foundCategories xs = "Found " ++ show n ++ " "
++ if n == 1 then "category" else "categories" ++ ": " ++ show (sort cs)
where
cs = nub xs
n = length cs

skippingLine lnum actual expected =
"WARNING: Skipping line " ++ show lnum ++
" (expected " ++ show expected ++ " elements, got " ++ show actual ++ ")"
39 changes: 39 additions & 0 deletions src/Parsers.hs
@@ -0,0 +1,39 @@
module Parsers (parseDoubles) where

import Data.List (unfoldr)
import qualified Data.ByteString.Lazy.Char8 as B
import qualified Data.ByteString.Lex.Lazy.Double as B

-- | Parse doubles separated by space or separator character (as determined by
-- 'isSeparator'). Anything else is translated into a 'Nothing'.
--
-- Examples:
--
-- > parseDoubles "1 2.3" = [Just 1.0,Just 2.3]
-- > parseDoubles "1,,2.3," = [Just 1.0,Nothing,Just 2.3,Nothing]
-- > parseDoubles "1 NA 3" = [Just 1.0,Nothing,Just 3.0]
--
parseDoubles :: B.ByteString -> [Maybe Double]
parseDoubles bs | B.null bs = []
| isSeparator (B.last bs) = ds ++ [Nothing]
| otherwise = ds
where
ds = unfoldr go (skipWhitespace bs)
go xs | B.null xs = Nothing
| otherwise = case B.readDouble xs of
Just (d, ys) -> Just (Just d, skipSep ys)
Nothing -> Just (Nothing, skipSep $ skipNA xs)

isWhitespace c = c == ' ' || c == '\t'

isSeparator c = c `elem` ",;"

skipWhitespace = B.dropWhile isWhitespace

skipNA = B.dropWhile (not . \c -> isWhitespace c || isSeparator c)

skipSep = skipWhitespace . skip . skipWhitespace
where
skip bs = case B.uncons bs of
Just (x, bs') -> if isSeparator x then bs' else B.cons x bs'
Nothing -> B.empty
80 changes: 80 additions & 0 deletions src/Utility.hs
@@ -0,0 +1,80 @@
-- |
-- Utility functions.
--
module Utility (
alternate
, bin
, equating
, exitBecause
, logStr
, logStrLn
, pad
, pleat
, trimBins
) where

import Data.Ord (comparing)
import Data.List (sort, sortBy, groupBy)
import System.Exit (exitFailure)
import System.IO (hPutStr, hPutStrLn, stderr)


-- | Put data in bins defined by the given categories.
bin :: Ord a => [a] -> [b] -> [[b]]
bin cats = map (snd . unzip)
. groupBy (equating fst)
. sortBy (comparing fst)
. zip cats

-- | Trim bins so that each bin contains at most the given number of elements.
trimBins :: Ord a => Int -> [[a]] -> [[a]]
trimBins n = map (take n . pleat)

-- | Take a list and reorder it so that the median element comes first and the
-- extremal elements come last. Example:
--
-- > pleat [1..9] = [5,4,6,3,7,2,8,1]
-- > pleat [0..9] = [5,4,6,3,7,2,8,1,9,0]
--
-- Note that the input need not be sorted.
pleat :: Ord a => [a] -> [a]
pleat xs = alternate bs (reverse as)
where
(as,bs) = splitAt (length xs `div` 2) $ sort xs

-- | Combine two lists into one by alternately taking an element from the
-- first, then from the second. For example:
--
-- > alternate [1..3] [5..9] = [1,5,2,6,3,7,8,9]
--
-- Note that no elements from either list are lost.
alternate :: [a] -> [a] -> [a]
alternate (x:xs) (y:ys) = x : y : alternate xs ys
alternate xs [] = xs
alternate [] ys = ys

-- | Use e.g. with sorting routines as in
--
-- > sortBy (equating fst)
--
equating :: Eq a => (t -> a) -> t -> t -> Bool
equating f x y = f x == f y

-- | Exit program with failure and log reason to stderr.
exitBecause :: String -> IO b
exitBecause reason = logStrLn reason >> exitFailure

-- | Log string with newline to stderr
logStrLn :: String -> IO ()
logStrLn = hPutStrLn stderr

-- | Log string to stderr
logStr :: String -> IO ()
logStr = hPutStr stderr

-- | Pad string with spaces so that it is at least 'n' chars wide
pad :: Int -> String -> String
pad n s | n > k = replicate (n-k) ' ' ++ s
| otherwise = s
where
k = length s

0 comments on commit 4b3c1f8

Please sign in to comment.