Skip to content

Commit

Permalink
Added tokensEither
Browse files Browse the repository at this point in the history
  • Loading branch information
mitchellwrosen authored and mitchellwrosen committed May 28, 2015
1 parent e14f53a commit 37b30dc
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 18 deletions.
1 change: 1 addition & 0 deletions lexer-applicative.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ library
-- other-extensions:
build-depends:
base >=4.5 && < 5,
deepseq >= 1.2,
srcloc >= 0.5,
regex-applicative >= 0.3.1
hs-source-dirs: src
Expand Down
42 changes: 35 additions & 7 deletions src/Language/Lexer/Applicative.hs
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
{-# LANGUAGE ScopedTypeVariables, DeriveDataTypeable #-}

-- | For an example, see
-- <https://ro-che.info/articles/2015-01-02-lexical-analysis>
module Language.Lexer.Applicative (tokens, LexicalError(..)) where
module Language.Lexer.Applicative (tokens, tokensEither, LexicalError(..)) where

import Text.Regex.Applicative
import Control.Exception
import Data.Loc
import Data.List
import Data.Maybe
import Data.Typeable (Typeable)
import Control.Exception
import System.IO.Unsafe (unsafePerformIO)
import Text.Regex.Applicative

annotate
:: String -- ^ source file name
Expand All @@ -21,10 +24,11 @@ annotate src s = snd $ mapAccumL f (startPos src, startPos src) s

-- | The lexical error exception
data LexicalError = LexicalError !Pos
deriving Typeable
deriving (Eq, Typeable)

instance Show LexicalError where
show (LexicalError pos) = "Lexical error at " ++ displayPos pos

instance Exception LexicalError

-- | The lexer.
Expand All @@ -39,12 +43,13 @@ instance Exception LexicalError
tokens
:: forall token.
RE Char token -- ^ regular expression for tokens
-> RE Char () -- ^ regular expression for whitespace and comments
-> String -- ^ source file name (used in locations)
-> String -- ^ source text
-> RE Char () -- ^ regular expression for whitespace and comments
-> String -- ^ source file name (used in locations)
-> String -- ^ source text
-> [L token]
tokens pToken pJunk src = go . annotate src
where
go :: [(Char, Pos, Pos)] -> [L token]
go l = case l of
[] -> []
s@((_, pos1, _):_) ->
Expand All @@ -68,3 +73,26 @@ tokens pToken pJunk src = go . annotate src

re :: RE (Char, Pos, Pos) (Maybe token)
re = comap (\(c, _, _) -> c) $ (Just <$> pToken) <|> (Nothing <$ pJunk)

-- | Like `tokens`, but pure, and strict in the spine of the list.
tokensEither
:: forall token.
=> RE Char token -- ^ regular expression for tokens
-> RE Char () -- ^ regular expression for whitespace and comments
-> String -- ^ source file name (used in locations)
-> String -- ^ source text
-> Either LexicalError [L token]
tokensEither pToken pJunk src =
lmap (fromJust . fromException)
. unsafePerformIO
. try
. evaluate
. forceSpine
. tokens pToken pJunk src
where
forceSpine :: [a] -> [a]
forceSpine xs = foldr (const id) () xs `seq` xs

lmap :: (a -> b) -> Either a c -> Either b c
lmap f (Left a) = Left (f a)
lmap _ (Right b) = Right b
42 changes: 31 additions & 11 deletions tests/test.hs
Original file line number Diff line number Diff line change
Expand Up @@ -10,32 +10,52 @@ import Data.Loc (L(..), Loc(..), Pos(..))
import Control.Exception
import Control.DeepSeq

whitespace :: RE Char ()
whitespace = () <$ some (psym isSpace)

unloc :: L a -> (a, Loc)
unloc (L l a) = (a, l)

-- this is bad, because it accepts an empty string
badWhitespace :: RE Char ()
badWhitespace = () <$ many (psym isSpace)

main = defaultMain $ testGroup "Tests"
[ testCase "Empty string" $
tokens (empty :: RE Char Int) empty "-" "" @=? []
, testCase "Space- and newline-separated numbers" $
unloc <$> tokens decimal whitespace "-" "1\n 23 456" @?=
[ (1, Loc (Pos "-" 1 1 0) (Pos "-" 1 1 0))
, (23, Loc (Pos "-" 2 2 3) (Pos "-" 2 3 4))
, (456,Loc (Pos "-" 2 6 7) (Pos "-" 2 8 9))
]
[ testCase "Empty string" $ do
tokens (empty :: RE Char Int) empty "-" "" @=? []
tokensEither (empty :: RE Char Int) empty "-" "" @=? Right []
, testCase "Space- and newline-separated numbers" $ do
let input = "1\n 23 456"
output = [ (1::Int, Loc (Pos "-" 1 1 0) (Pos "-" 1 1 0))
, (23, Loc (Pos "-" 2 2 3) (Pos "-" 2 3 4))
, (456,Loc (Pos "-" 2 6 7) (Pos "-" 2 8 9))
]
unloc <$> tokens decimal whitespace "-" input @?= output
fmap unloc <$> tokensEither decimal whitespace "-" input @?= Right output
, testCase "Nullable parser, no error" $ do
r <- try . evaluate $ tokens decimal badWhitespace "-" "31 45"
let input = "31 45"

r <- try . evaluate $ tokens decimal badWhitespace "-" input
case r of
Right (_ :: [L Int]) -> return ()
Left (e :: SomeException) -> assertFailure $ show e

case tokensEither decimal badWhitespace "-" input of
Right (_ :: [L Int]) -> return ()
Left e -> assertFailure $ show e
, testCase "Nullable parser, error" $ do
r <- try . evaluate . force $ tokens decimal badWhitespace "-" "31? 45"
let filename = "-"
input = "31? 45"
expectedPos = Pos filename 1 3 2

r <- try . evaluate . force $ tokens decimal badWhitespace filename input
case r of
Right (_ :: [L Int]) -> assertFailure "No error?"
Left (LexicalError p) -> p @?= Pos "-" 1 3 2
Left (LexicalError p) -> p @?= expectedPos

case tokensEither decimal whitespace "-" input of
Right (_ :: [L Int]) -> assertFailure "No error?"
Left (LexicalError p) -> p @?= expectedPos
]

-- orphan
Expand Down

0 comments on commit 37b30dc

Please sign in to comment.