Skip to content
Andrew O'Brien edited this page Aug 7, 2013 · 5 revisions

Simple URL parsing

Originally published on StackOverflow.

    import Text.Regex.Applicative

    data Protocol = HTTP | FTP deriving Show

    protocol :: RE Char Protocol
    protocol = HTTP <$ string "http" <|> FTP <$ string "ftp"

    type Host = String
    type Location = String
    data URL = URL Protocol Host Location deriving Show

    host :: RE Char Host
    host = many $ psym $ (/= '/')

    url :: RE Char URL
    url = URL <$> protocol <* string "://" <*> host <* sym '/' <*> many anySym

    main = print $ "http://stackoverflow.com/questions" =~ url

Simple expression language

    import Text.Regex.Applicative
    import Data.Char hiding (Space)
    import Data.Maybe

    data Lexeme
        = Number Int
        | Op Char
        | Identifier String
        | LParen
        | RParen
        deriving Show

    num :: RE Char Int
    num = read <$> many (psym isDigit)

    op :: RE Char Char
    op = foldr1 (<|>) $ map sym ['+', '-', '/', '*']

    identifier :: RE Char String
    identifier = (:) <$> psym isAlpha <*> many (psym isAlphaNum)

    space :: RE Char String
    space = many $ psym isSpace

    lexeme :: RE Char Lexeme
    lexeme = (Number <$> num)
         <|> (Op <$> op)
         <|> (Identifier <$> identifier)
         <|> (LParen <$ sym '(')
         <|> (RParen <$ sym ')')

    lexemes :: RE Char [Lexeme]
    lexemes = catMaybes <$> many ((Just <$> lexeme) <|> (Nothing <$ space))

    main = print $ "a + 2*b - 3/c" =~ lexemes