/
fasta.ghc-2.ghc
106 lines (89 loc) · 3.34 KB
/
fasta.ghc-2.ghc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
{-# OPTIONS -fvia-C -O2 -optc-O2 -optc-ffast-math -fbang-patterns -fexcess-precision #-}
--
-- The Computer Language Benchmarks Game
-- http://shootout.alioth.debian.org/
--
-- Contributed by Don Stewart
-- A lazy bytestring solution.
-- Unnecessary strictness annotations removed by Sterling Clover 2/08
--
-- Add:
-- -optc-mfpmath=sse -optc-msse2
--
import System
import Data.Word
import Control.Arrow
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString.Lazy.Char8 as C (pack,unfoldr)
import qualified Data.ByteString as S
import Data.ByteString.Internal
import Data.ByteString.Unsafe
main = do
n <- getArgs >>= readIO . head
writeFasta "ONE" "Homo sapiens alu" (n*2) (L.cycle alu)
g <- unfold "TWO" "IUB ambiguity codes" (n*3) (look iubs) 42
unfold "THREE" "Homo sapiens frequency" (n*5) (look homs) g
------------------------------------------------------------------------
--
-- lazily unfold the randomised dna sequences
--
unfold l t n f g = putStrLn (">" ++ l ++ " " ++ t) >> unroll f g n
unroll :: (Int -> (Word8, Int)) -> Int -> Int -> IO Int
unroll f = loop
where
loop r 0 = return r
loop !r i = case S.unfoldrN m (Just . f) r of
(!s, Just r') -> do
S.putStrLn s
loop r' (i-m)
where m = min i 60
look ds k = (choose ds d, j)
where (d,j) = rand k
choose :: [(Word8,Float)] -> Float -> Word8
choose [(b,_)] _ = b
choose ((b,f):xs) p = if p < f then b else choose xs (p-f)
------------------------------------------------------------------------
--
-- only demand as much of the infinite sequence as we require
writeFasta label title n s = do
putStrLn $ ">" ++ label ++ " " ++ title
let (t:ts) = L.toChunks s
go ts t n
where
go ss s n
| l60 && n60 = S.putStrLn l >> go ss r (n-60)
| n60 = S.putStr s >> S.putStrLn a >> go (tail ss) b (n-60)
| n <= ln = S.putStrLn (S.take n s)
| otherwise = S.putStr s >> S.putStrLn (S.take (n-ln) (head ss))
where
ln = S.length s
l60 = ln >= 60
n60 = n >= 60
(l,r) = S.splitAt 60 s
(a,b) = S.splitAt (60-ln) (head ss)
------------------------------------------------------------------------
im = 139968
ia = 3877
ic = 29573
rand :: Int -> (Float, Int)
rand seed = (newran,newseed)
where
newseed = (seed * ia + ic) `rem` im
newran = 1.0 * fromIntegral newseed / imd
imd = fromIntegral im
------------------------------------------------------------------------
alu = C.pack
"GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG\
\GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA\
\CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT\
\ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA\
\GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG\
\AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC\
\AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"
iubs = map (c2w *** id)
[('a',0.27),('c',0.12),('g',0.12),('t',0.27),('B',0.02)
,('D',0.02),('H',0.02),('K',0.02),('M',0.02),('N',0.02)
,('R',0.02),('S',0.02),('V',0.02),('W',0.02),('Y',0.02)]
homs = map (c2w *** id)
[('a',0.3029549426680),('c',0.1979883004921)
,('g',0.1975473066391),('t',0.3015094502008)]