From 567340ee7020185c179e13205b1ae4f8022fbc57 Mon Sep 17 00:00:00 2001 From: Bjorn Winckler Date: Sun, 9 Mar 2014 21:47:17 +0100 Subject: [PATCH] Start work on primer scanning --- src/Main.hs | 32 ++++++++++++++++---------------- src/Options.hs | 27 +++++++++++++++++++++++++++ src/Scan.hs | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 75 insertions(+), 16 deletions(-) create mode 100644 src/Options.hs create mode 100644 src/Scan.hs diff --git a/src/Main.hs b/src/Main.hs index 020a4ef..80caedb 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -4,24 +4,12 @@ import Control.Applicative ((<$>), (<*>), pure) import Options.Applicative ((<>), optional) import Options.Applicative (Parser, command, info, execParser, strOption, help, switch, help, helper, - fullDesc, progDesc, long, header, - subparser, flag) + fullDesc, progDesc, long, header, str, + subparser, flag, value, argument, metavar) -data IseqOptions = IseqOptions { - optVerbose :: Bool - , optOutput :: Maybe String - , optCommand :: Command - } deriving (Show) +import Options +import Scan (scan) -data Command = - CmdAlign { optAlignment :: Alignment } - | CmdMerge - deriving (Show) - -data Alignment = - GlobalAlignment - | LocalAlignment - deriving (Show, Eq) main :: IO () @@ -30,6 +18,11 @@ main = do (fullDesc <> header "Tools for processing Illumina ?iSeq data")) print o + case optCommand o of + CmdScan input primer -> scan o input primer + _ -> return () + + parser :: Parser IseqOptions parser = IseqOptions <$> switch (long "verbose" <> help "Be verbose") @@ -38,6 +31,7 @@ parser = IseqOptions command "align" (info alignOptParser $ progDesc "Align against database") <> command "merge" (info mergeOptParser $ progDesc "Merge paired reads") + <> command "scan" (info scanOptParser $ progDesc "Scan for primer") ) alignOptParser :: Parser Command @@ -47,3 +41,9 @@ alignOptParser = CmdAlign mergeOptParser :: Parser Command mergeOptParser = pure CmdMerge + +scanOptParser :: Parser Command +scanOptParser = CmdScan + <$> strOption (long "input" <> value "/dev/stdin" + <> help "Fasta file to scan") + <*> argument str (metavar "PRIMER" <> help "Primer sequence to scan for") diff --git a/src/Options.hs b/src/Options.hs new file mode 100644 index 0000000..a10b172 --- /dev/null +++ b/src/Options.hs @@ -0,0 +1,27 @@ +module Options ( + IseqOptions(..) + , Command(..) + , Alignment(..) + ) where + + +data IseqOptions = IseqOptions { + optVerbose :: Bool + , optOutput :: Maybe FilePath + , optCommand :: Command + } deriving (Show) + +data Command = + CmdAlign { optAlignment :: Alignment } + | CmdMerge + | CmdScan { + optInput :: FilePath + , optPrimer :: String + } + deriving (Show) + +data Alignment = + GlobalAlignment + | LocalAlignment + deriving (Show, Eq) + diff --git a/src/Scan.hs b/src/Scan.hs new file mode 100644 index 0000000..3b5b47c --- /dev/null +++ b/src/Scan.hs @@ -0,0 +1,32 @@ +{-# LANGUAGE BangPatterns #-} + +module Scan (bestMatchWithin, editDist, scan) where + +import Data.List (tails, minimumBy) +import Data.Ord (comparing) + +import Options + + +-- Find position within n characters from start of y that minimizes edit +-- distance to x. +bestMatchWithin :: Eq a => Int -> [a] -> [a] -> (Int, Int) +bestMatchWithin n x y = minimumBy (comparing snd) $ zip [1..] distances + where + distances = map (editDist x) (take (max 1 $ n+1) $ tails y) + + +-- Edit distance between two lists +editDist :: Eq a => [a] -> [a] -> Int +editDist = go 0 + where + go !n [] _ = n + go !n xs [] = n + length xs + go !n xa@(x:xs) ya@(y:ys) + | x == y = go n xs ys + | otherwise = minimum [ go (n+1) xa ys -- deletion + , go (n+1) xs ya -- insertion + , go (n+1) xs ys ] -- substitution + + +scan = undefined