Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 4b3c1f8
Showing
7 changed files
with
262 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
dist/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
Copyright (c)2012, Björn Winckler <bjorn.winckler@gmail.com> | ||
|
||
All rights reserved. | ||
|
||
Redistribution and use in source and binary forms, with or without | ||
modification, are permitted provided that the following conditions are met: | ||
|
||
* Redistributions of source code must retain the above copyright | ||
notice, this list of conditions and the following disclaimer. | ||
|
||
* Redistributions in binary form must reproduce the above | ||
copyright notice, this list of conditions and the following | ||
disclaimer in the documentation and/or other materials provided | ||
with the distribution. | ||
|
||
* Neither the name of Björn Winckler <bjorn.winckler@gmail.com> nor the names of other | ||
contributors may be used to endorse or promote products derived | ||
from this software without specific prior written permission. | ||
|
||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
import Distribution.Simple | ||
main = defaultMain |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
name: sigscore | ||
version: 0.1 | ||
synopsis: Signature score tool | ||
homepage: https://github.com/b4winckler/up-down-signature | ||
license: BSD3 | ||
license-file: LICENSE | ||
author: Björn Winckler <bjorn.winckler@gmail.com> | ||
maintainer: Björn Winckler <bjorn.winckler@gmail.com> | ||
copyright: 2012 Björn Winckler | ||
category: Math | ||
build-type: Simple | ||
cabal-version: >= 1.2 | ||
extra-source-files: README.markdown | ||
|
||
description: | ||
Calculate signature scores for point configurations. | ||
|
||
executable sigscore | ||
main-is: Main.hs | ||
hs-source-dirs: src | ||
build-depends: base >= 4 && < 5, | ||
bytestring, | ||
bytestring-lexing, | ||
mwc-random >= 0.12, | ||
up-down-signature >= 0.1 | ||
|
||
-- test-suite tests | ||
-- type: exitcode-stdio-1.0 | ||
-- hs-source-dirs: tests | ||
-- main-is: Tests.hs | ||
-- ghc-options: -Wall | ||
-- build-depends: base | ||
-- -- QuickCheck >= 2 | ||
-- -- test-framework, | ||
-- -- test-framework-quickcheck2, | ||
-- -- test-framework-hunit, | ||
|
||
source-repository head | ||
type: git | ||
location: https://github.com/b4winckler/sigscore |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
{-# LANGUAGE BangPatterns #-} | ||
module Main where | ||
|
||
import Control.Applicative ((<$>)) | ||
import Control.Monad (forM_, when) | ||
import Data.List (nub, sort) | ||
import Data.Maybe (catMaybes) | ||
import System.Environment (getArgs) | ||
import System.Exit (exitSuccess) | ||
import System.Random.MWC (withSystemRandom) | ||
|
||
import Parsers (parseDoubles) | ||
import Utility (logStr, logStrLn, exitBecause, bin, pad) | ||
|
||
import qualified Data.ByteString.Lazy.Char8 as B | ||
import qualified Data.UpDownSignature as Sig | ||
|
||
|
||
main = withSystemRandom $ \gen -> do | ||
as <- getArgs | ||
ls <- B.lines <$> if null as then B.getContents else B.readFile (head as) | ||
|
||
when (null ls) exitSuccess -- Nothing to do! | ||
|
||
-- The first line is a list of integers representing categories | ||
let cats = map truncate $ catMaybes $ parseDoubles $ head ls | ||
ncats = length cats | ||
|
||
-- Sanity check and log info about categories | ||
logStrLn $ foundCategories cats | ||
when (ncats < 3) $ exitBecause tooFewCategories | ||
when (length (nub cats) > 20) $ logStrLn tooManyCategories | ||
|
||
-- Each line is a list of doubles, with as many elements as there were in the | ||
-- categories line. Missing values and NAs are allowed. Go through each and | ||
-- compute the signature score. | ||
forM_ (zip [1..] $ tail ls) $ \(lnum,line) -> do | ||
let elems = parseDoubles line | ||
nelems = length elems | ||
if ncats == nelems | ||
then do | ||
let bins = map catMaybes $ bin cats elems | ||
s <- Sig.approxScore gen numPaths $ bins | ||
-- let s = Sig.score $ trimBins binSize $ map catMaybes $ bin cats elems | ||
putStrLn $ show s | ||
when (lnum `mod` 100 == 0) $ logStr $ " " ++ pad 4 (show lnum) | ||
when (lnum `mod` 1000 == 0) $ logStrLn "" | ||
else | ||
logStrLn $ skippingLine lnum nelems ncats | ||
|
||
|
||
numPaths = 10000 | ||
binSize = 4 | ||
|
||
tooManyCategories = "\ | ||
\WARNING: More than 20 categories. Break up the data into fewer categories\n\ | ||
\ to cut down on processing times." | ||
|
||
tooFewCategories = "\ | ||
\ERROR: First line must be a list of at least three integer categories." | ||
|
||
foundCategories xs = "Found " ++ show n ++ " " | ||
++ if n == 1 then "category" else "categories" ++ ": " ++ show (sort cs) | ||
where | ||
cs = nub xs | ||
n = length cs | ||
|
||
skippingLine lnum actual expected = | ||
"WARNING: Skipping line " ++ show lnum ++ | ||
" (expected " ++ show expected ++ " elements, got " ++ show actual ++ ")" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
module Parsers (parseDoubles) where | ||
|
||
import Data.List (unfoldr) | ||
import qualified Data.ByteString.Lazy.Char8 as B | ||
import qualified Data.ByteString.Lex.Lazy.Double as B | ||
|
||
-- | Parse doubles separated by space or separator character (as determined by | ||
-- 'isSeparator'). Anything else is translated into a 'Nothing'. | ||
-- | ||
-- Examples: | ||
-- | ||
-- > parseDoubles "1 2.3" = [Just 1.0,Just 2.3] | ||
-- > parseDoubles "1,,2.3," = [Just 1.0,Nothing,Just 2.3,Nothing] | ||
-- > parseDoubles "1 NA 3" = [Just 1.0,Nothing,Just 3.0] | ||
-- | ||
parseDoubles :: B.ByteString -> [Maybe Double] | ||
parseDoubles bs | B.null bs = [] | ||
| isSeparator (B.last bs) = ds ++ [Nothing] | ||
| otherwise = ds | ||
where | ||
ds = unfoldr go (skipWhitespace bs) | ||
go xs | B.null xs = Nothing | ||
| otherwise = case B.readDouble xs of | ||
Just (d, ys) -> Just (Just d, skipSep ys) | ||
Nothing -> Just (Nothing, skipSep $ skipNA xs) | ||
|
||
isWhitespace c = c == ' ' || c == '\t' | ||
|
||
isSeparator c = c `elem` ",;" | ||
|
||
skipWhitespace = B.dropWhile isWhitespace | ||
|
||
skipNA = B.dropWhile (not . \c -> isWhitespace c || isSeparator c) | ||
|
||
skipSep = skipWhitespace . skip . skipWhitespace | ||
where | ||
skip bs = case B.uncons bs of | ||
Just (x, bs') -> if isSeparator x then bs' else B.cons x bs' | ||
Nothing -> B.empty |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
-- | | ||
-- Utility functions. | ||
-- | ||
module Utility ( | ||
alternate | ||
, bin | ||
, equating | ||
, exitBecause | ||
, logStr | ||
, logStrLn | ||
, pad | ||
, pleat | ||
, trimBins | ||
) where | ||
|
||
import Data.Ord (comparing) | ||
import Data.List (sort, sortBy, groupBy) | ||
import System.Exit (exitFailure) | ||
import System.IO (hPutStr, hPutStrLn, stderr) | ||
|
||
|
||
-- | Put data in bins defined by the given categories. | ||
bin :: Ord a => [a] -> [b] -> [[b]] | ||
bin cats = map (snd . unzip) | ||
. groupBy (equating fst) | ||
. sortBy (comparing fst) | ||
. zip cats | ||
|
||
-- | Trim bins so that each bin contains at most the given number of elements. | ||
trimBins :: Ord a => Int -> [[a]] -> [[a]] | ||
trimBins n = map (take n . pleat) | ||
|
||
-- | Take a list and reorder it so that the median element comes first and the | ||
-- extremal elements come last. Example: | ||
-- | ||
-- > pleat [1..9] = [5,4,6,3,7,2,8,1] | ||
-- > pleat [0..9] = [5,4,6,3,7,2,8,1,9,0] | ||
-- | ||
-- Note that the input need not be sorted. | ||
pleat :: Ord a => [a] -> [a] | ||
pleat xs = alternate bs (reverse as) | ||
where | ||
(as,bs) = splitAt (length xs `div` 2) $ sort xs | ||
|
||
-- | Combine two lists into one by alternately taking an element from the | ||
-- first, then from the second. For example: | ||
-- | ||
-- > alternate [1..3] [5..9] = [1,5,2,6,3,7,8,9] | ||
-- | ||
-- Note that no elements from either list are lost. | ||
alternate :: [a] -> [a] -> [a] | ||
alternate (x:xs) (y:ys) = x : y : alternate xs ys | ||
alternate xs [] = xs | ||
alternate [] ys = ys | ||
|
||
-- | Use e.g. with sorting routines as in | ||
-- | ||
-- > sortBy (equating fst) | ||
-- | ||
equating :: Eq a => (t -> a) -> t -> t -> Bool | ||
equating f x y = f x == f y | ||
|
||
-- | Exit program with failure and log reason to stderr. | ||
exitBecause :: String -> IO b | ||
exitBecause reason = logStrLn reason >> exitFailure | ||
|
||
-- | Log string with newline to stderr | ||
logStrLn :: String -> IO () | ||
logStrLn = hPutStrLn stderr | ||
|
||
-- | Log string to stderr | ||
logStr :: String -> IO () | ||
logStr = hPutStr stderr | ||
|
||
-- | Pad string with spaces so that it is at least 'n' chars wide | ||
pad :: Int -> String -> String | ||
pad n s | n > k = replicate (n-k) ' ' ++ s | ||
| otherwise = s | ||
where | ||
k = length s |