Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
tree: d5b92c4266
Fetching contributors…

Cannot retrieve contributors at this time

file 173 lines (148 sloc) 3.973 kb
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173
-----------------------------------------------------------------------------
-- |
-- Module : Ministg.Lexer
-- Copyright : (c) 2009-2012 Bernie Pope
-- License : BSD-style
-- Maintainer : florbitous@gmail.com
-- Stability : experimental
-- Portability : ghc
--
-- Lexical analysis for ministg programs.
-----------------------------------------------------------------------------

module Ministg.Lexer
   ( Token (..)
   , Symbol (..)
   , Ident
   , lexer
   )
where

import Data.Char
   ( isDigit
   , isAlpha
   , isPrint
   , isLower
   )

import Text.ParserCombinators.Parsec hiding (token)
import Control.Applicative hiding ((<|>), many)

newtype Token = Token (SourcePos, Symbol)

instance Show Token where
   show (Token (pos, symbol)) = show symbol

type Ident = String

data Symbol
   = Variable Ident
   | Constructor Ident
   | Natural Integer
   | QuotedString String
   | Equals
   | BackSlash
   | RightArrow
   | Let
   | In
   | Case
   | Of
   | LeftParen
   | RightParen
   | LeftBrace
   | RightBrace
   | SemiColon
   | Fun
   | Con
   | Pap
   | Thunk
   | Plus
   | Minus
   | Times
   | Equality
   | GreaterThan
   | LessThan
   | GreaterThanEquals
   | LessThanEquals
   | IntToBool
   | Stack
   | Error
   deriving (Eq, Show)

lexer :: String -> String -> Either ParseError [Token]
lexer = parse tokenise

tokenise :: Parser [Token]
tokenise = skip *> sepEndBy token skip <* eof

skip :: Parser ()
skip = skipMany (comment <|> whiteSpace)

whiteSpace :: Parser ()
whiteSpace = space >> return ()

comment :: Parser ()
comment = singleLineComment

singleLineComment :: Parser ()
singleLineComment = string "#" >> manyTill anyChar eol >> return ()

eol :: Parser ()
eol = newline >> return ()

-- XXX Perhaps it is not sensible to divide the tokens based on their
-- syntactic role. Sometimes tokens from different syntactic classes can have
-- the same prefix.
token :: Parser Token
token =
   punctuation <|>
   keyword <|>
   variable <|>
   constructor <|>
   parenthesis <|>
   number <|>
   quotedString

number :: Parser Token
number = tokenPos parseNum Natural
   where
   parseNum :: Parser Integer
   parseNum = read <$> many1 digit

quotedString :: Parser Token
quotedString = tokenPos parseString QuotedString
   where
   parseString :: Parser String
   parseString = char '"' *> manyTill anyChar (char '"')

variable :: Parser Token
variable = tokenPos (parseIdent lower) Variable

constructor :: Parser Token
constructor = tokenPos (parseIdent upper) Constructor

parseIdent :: Parser Char -> Parser String
parseIdent firstChar = (:) <$> firstChar <*> many (char '_' <|> alphaNum)

keyword :: Parser Token
keyword =
   key "let" Let <|>
   key "in" In <|>
   key "case" Case <|>
   key "of" Of <|>
   key "FUN" Fun <|>
   key "CON" Con <|>
   key "PAP" Pap <|>
   key "THUNK" Thunk <|>
   key "plus#" Plus <|>
   key "sub#" Minus <|>
   key "mult#" Times <|>
   key "eq#" Equality <|>
   key "lt#" LessThan <|>
   key "lte#" LessThanEquals <|>
   key "gt#" GreaterThan <|>
   key "gte#" GreaterThanEquals <|>
   key "intToBool#" IntToBool <|>
   key "ERROR" Error <|>
   key "stack" Stack
   where
   key :: String -> Symbol -> Parser Token
   key str keyWord = tokenPos (try kwParser) (const keyWord)
      where
      kwParser = string str >> notFollowedBy alphaNum

parenthesis :: Parser Token
parenthesis =
   simpleTok "(" LeftParen <|>
   simpleTok ")" RightParen <|>
   simpleTok "{" LeftBrace <|>
   simpleTok "}" RightBrace

punctuation :: Parser Token
punctuation =
   simpleTok "=" Equals
   <|> simpleTok ";" SemiColon
   <|> simpleTok "\\" BackSlash
   <|> simpleTok "->" RightArrow

simpleTok :: String -> Symbol -> Parser Token
simpleTok str token = tokenPos (string str) (const token)

tokenPos :: Parser a -> (a -> Symbol) -> Parser Token
tokenPos parser mkToken =
  Token <$> ((,) <$> getPosition <*> (mkToken <$> parser))
Something went wrong with that request. Please try again.