src/full/Agda/Syntax/Parser/Lexer.x

{
#if  __GLASGOW_HASKELL__ > 800
{-# OPTIONS_GHC -Wno-error=deprecated-flags   #-}
{-# OPTIONS_GHC -Wno-error=missing-signatures #-}
{-# OPTIONS_GHC -Wno-error=tabs               #-}
{-# OPTIONS_GHC -Wno-error=unused-imports     #-}
#endif
{-# OPTIONS_GHC -fno-warn-deprecated-flags   #-}
{-# OPTIONS_GHC -fno-warn-missing-signatures #-}
{-# OPTIONS_GHC -fno-warn-tabs               #-}
{-# OPTIONS_GHC -fno-warn-unused-imports     #-}

{-| The lexer is generated by Alex (<http://www.haskell.org/alex>) and is an
    adaptation of GHC's lexer. The main lexing function 'lexer' is called by
    the "Agda.Syntax.Parser.Parser" to get the next token from the input.
-}
module Agda.Syntax.Parser.Lexer
    ( -- * The main function
      lexer
      -- * Lex states
    , normal, code
    , layout, empty_layout, bol, imp_dir
      -- * Alex generated functions
    , AlexReturn(..), alexScanUser
    ) where

import Agda.Syntax.Parser.Alex
import Agda.Syntax.Parser.Comments
#ifndef __HADDOCK__
import {-# SOURCE #-} Agda.Syntax.Parser.Layout
import {-# SOURCE #-} Agda.Syntax.Parser.LexActions
#endif
import Agda.Syntax.Parser.Monad
import Agda.Syntax.Parser.StringLiterals
import Agda.Syntax.Parser.Tokens
import Agda.Syntax.Literal

}

-- Note that the regular expressions should not use non-ASCII
-- characters, see Agda.Syntax.Parser.Alex.alexGetByte.

$digit       = 0-9
$hexdigit    = [ $digit a-f A-F ]
$binarydigit = 0-1
$alpha       = [ A-Z a-z _ ]
$op          = [ \- \! \# \$ \% \& \* \+ \/ \< \= \> \^ \| \~ \? \` \[ \] \, \: ]
$idstart     = [ $digit $alpha $op ]
$idchar      = [ $idstart ' \\ ]
$nonalpha    = $idchar # $alpha
$white_notab = $white # \t
$white_nonl  = $white_notab # \n

@prettynumber = $digit+ ([_] $digit+)*
              | "0x" $hexdigit+ ([_] $hexdigit+)*
              | "0b" $binarydigit+ ([_] $binarydigit+)*
@integer      = [\-]? @prettynumber
@decimal      = $digit+
@exponent     = [eE] [\-\+]? @decimal
@float        = [\-]? @decimal \. @decimal @exponent?
              | [\-]? @decimal @exponent

-- A name can't start with \x (to allow \x -> x).
-- Bug in alex: [ _ op ]+ doesn't seem to work!
@start = ($idstart # [_]) | \\ [ $nonalpha ]
@ident = @start $idchar* | [_] $idchar+

@namespace  = (@ident \.)*
@q_ident    = @namespace @ident

tokens :-

-- White space
<0,code,bol_,layout_,empty_layout_,imp_dir_>
    $white_nonl+    ;

<pragma_,fpragma_> $white_notab ;

-- Pragmas
<0,code,pragma_>   "{-#"               { beginWith pragma  $ symbol SymOpenPragma }
<fpragma_>         "{-#"               { beginWith fpragma $ symbol SymOpenPragma }
<pragma_,fpragma_> "#-}"               { endWith $ symbol SymClosePragma }
<pragma_>   "BUILTIN"                  { keyword KwBUILTIN }
<pragma_>   "CATCHALL"                 { keyword KwCATCHALL }
<pragma_>   "COMPILE"                  { endWith $ beginWith fpragma $ keyword KwCOMPILE }
<pragma_>   "FOREIGN"                  { endWith $ beginWith fpragma $ keyword KwFOREIGN }
<pragma_>   "DISPLAY"                  { keyword KwDISPLAY }
<pragma_>   "ETA"                      { keyword KwETA }
<pragma_>   "IMPOSSIBLE"               { keyword KwIMPOSSIBLE }
<pragma_>   "INJECTIVE"                { keyword KwINJECTIVE }
<pragma_>   "INJECTIVE_FOR_INFERENCE"  { keyword KwINJECTIVE_FOR_INFERENCE }
<pragma_>   "INLINE"                   { keyword KwINLINE }
<pragma_>   "INCOHERENT"               { keyword KwINCOHERENT }
<pragma_>   "NOINLINE"                 { keyword KwNOINLINE }
<pragma_>   "NOT_PROJECTION_LIKE"      { keyword KwNOT_PROJECTION_LIKE }
<pragma_>   "LINE"                     { keyword KwLINE }
<pragma_>   "MEASURE"                  { keyword KwMEASURE }
<pragma_>   "NO_POSITIVITY_CHECK"      { keyword KwNO_POSITIVITY_CHECK }
<pragma_>   "NO_TERMINATION_CHECK"     { keyword KwNO_TERMINATION_CHECK }
<pragma_>   "NO_UNIVERSE_CHECK"        { keyword KwNO_UNIVERSE_CHECK }
<pragma_>   "NON_COVERING"             { keyword KwNON_COVERING }
<pragma_>   "NON_TERMINATING"          { keyword KwNON_TERMINATING }
<pragma_>   "OPTIONS"                  { keyword KwOPTIONS }
<pragma_>   "POLARITY"                 { keyword KwPOLARITY }
<pragma_>   "OVERLAPPABLE"             { keyword KwOVERLAPPABLE }
<pragma_>   "OVERLAPPING"              { keyword KwOVERLAPPING }
<pragma_>   "OVERLAPS"                 { keyword KwOVERLAPS }
<pragma_>   "REWRITE"                  { keyword KwREWRITE }
<pragma_>   "STATIC"                   { keyword KwSTATIC }
<pragma_>   "TERMINATING"              { keyword KwTERMINATING }
<pragma_>   "WARNING_ON_USAGE"         { keyword KwWARNING_ON_USAGE }
<pragma_>   "WARNING_ON_IMPORT"        { keyword KwWARNING_ON_IMPORT }
<pragma_>   . # [ $white \" ] +        { withInterval $ TokString } -- we recognise string literals in pragmas
<fpragma_>  . # [ $white ] +           { withInterval $ TokString }

-- Comments
    -- We need to rule out pragmas here. Usually longest match would take
    -- precedence, but in some states pragmas aren't valid but comments are.
<0,code,bol_,layout_,empty_layout_,imp_dir_>
    "{-" / { not' (followedBy '#') }    { nestedComment }
    -- A misplaced end-comment, like in @f {x-} = x-@ gives a parse error.
    "-}"                                { symbol SymEndComment }
    @ident "-}"                         { symbol SymEndComment }

-- Dashes followed by a name symbol should be parsed as a name.
<0,code,bol_,layout_,empty_layout_,imp_dir_>
    "--" .* / { keepComments .&&. (followedBy '\n' .||. eof) }
              { confirmLayout `andThen` withInterval TokComment }
<0,code,bol_,layout_,empty_layout_,imp_dir_>
    "--" .* / { followedBy '\n' .||. eof }
              { confirmLayout `andThen` skip }

-- Note: we need to confirm tentative layout columns whenever we meet
-- a newline character ('\n').
-- The exception is the newline after a layout keyword.

-- We need to check the offside rule for the first token on each line.  We
-- should not check the offside rule for the end of file token or an
-- '\end{code}'
<0,code,imp_dir_> \n    { begin bol_ }  -- Note that @begin@ revisits '\n' in the new state!
<bol_>
    {
        \n                      { confirmLayout `andThen` skip }
--      ^ \\ "end{code}"    { end }
        () / { not' eof }       { offsideRule }
    }

-- After a layout keyword the
-- indentation of the first token decides the column of the layout block.
<layout_>
    {   \n      { confirmLayout `andThen` skip}
        ()      { endWith newLayoutBlock }
    }

-- The only rule for the empty_layout state. Generates a close brace.
<empty_layout_> ()              { emptyLayout }

-- Keywords
<0,code> abstract          { keyword KwAbstract }
<0,code> codata            { keyword KwCoData }
<0,code> coinductive       { keyword KwCoInductive }
<0,code> constructor       { keyword KwConstructor }
<0,code> data              { keyword KwData }
<0,code> do                { keyword KwDo }
<0,code> "eta-equality"    { keyword KwEta }
<0,code> field             { keyword KwField }
<0,code> forall            { keyword KwForall }
<0,code> import            { keyword KwImport }
<0,code> in                { keyword KwIn }
<0,code> inductive         { keyword KwInductive }
<0,code> infix             { keyword KwInfix }
<0,code> infixl            { keyword KwInfixL }
<0,code> infixr            { keyword KwInfixR }
<0,code> instance          { keyword KwInstance }
<0,code> interleaved       { keyword KwInterleaved }
<0,code> let               { keyword KwLet }
<0,code> macro             { keyword KwMacro }
<0,code> module            { keyword KwModule }
<0,code> mutual            { keyword KwMutual }
<0,code> "no-eta-equality" { keyword KwNoEta }
<0,code> open              { keyword KwOpen }
<0,code> overlap           { keyword KwOverlap }
<0,code> pattern           { keyword KwPatternSyn }
<0,code> postulate         { keyword KwPostulate }
<0,code> primitive         { keyword KwPrimitive }
<0,code> private           { keyword KwPrivate }
<0,code> quote             { keyword KwQuote }
<0,code> quoteTerm         { keyword KwQuoteTerm }
<0,code> record            { keyword KwRecord }
<0,code> rewrite           { keyword KwRewrite }
<0,code> syntax            { keyword KwSyntax }
<0,code> tactic            { keyword KwTactic }
<0,code> unquote           { keyword KwUnquote }
<0,code> unquoteDecl       { keyword KwUnquoteDecl }
<0,code> unquoteDef        { keyword KwUnquoteDef  }
<0,code> variable          { keyword KwVariable }
<0,code> where             { keyword KwWhere }
<0,code> with              { keyword KwWith }
<0,code> opaque            { keyword KwOpaque }
<0,code> unfolding         { keyword KwUnfolding }

-- The parser is responsible to put the lexer in the imp_dir_ state when it
-- expects an import directive keyword. This means that if you run the
-- tokensParser you will never see these keywords.
<0,code> using      { keyword KwUsing }
<0,code> hiding     { keyword KwHiding }
<0,code> renaming   { keyword KwRenaming }
<imp_dir_> to       { endWith $ keyword KwTo }
<0,code> public     { keyword KwPublic }

-- Holes
<0,code> "{!"           { hole }

-- Special symbols
<0,code> "..."          { symbol SymEllipsis }
<0,code> ".."           { symbol SymDotDot }
<0,code> "."            { symbol SymDot }
<0,code> ";"            { symbol SymSemi }
<0,code> ":"            { symbol SymColon }
<0,code> "="            { symbol SymEqual }
<0,code> "_"            { symbol SymUnderscore }
<0,code> "?"            { symbol SymQuestionMark }
<0,code> "|"            { symbol SymBar }
<0,code> "(|" /[$white] { symbol SymOpenIdiomBracket }
<0,code> "|)"           { symbol SymCloseIdiomBracket }
<0,code> "(|)"          { symbol SymEmptyIdiomBracket }
<0,code> "("            { symbol SymOpenParen }
<0,code> ")"            { symbol SymCloseParen }
<0,code> "->"           { symbol SymArrow }
<0,code> "\"            { symbol SymLambda } -- "
<0,code> "@"            { symbol SymAs }
<0,code> "{{" /[^[!\-]] { symbol SymDoubleOpenBrace }
-- Andreas, 2019-08-08, issue #3962, don't lex '{{' if followed by '-'
-- since this will be confused with '{-' (start of comment) by Emacs.

-- We don't lex '}}' into a SymDoubleCloseBrace. Instead, we lex it as
-- two SymCloseBrace's. When the parser is looking for a double
-- closing brace, it will also accept two SymCloseBrace's, after
-- verifying that they are immediately next to each other.
-- This trick allows us to keep "record { a = record {}}" working
-- properly.
-- <0,code> "}}"                { symbol SymDoubleCloseBrace }

<0,code> "{"            { symbol SymOpenBrace }     -- you can't use braces for layout
<0,code> "}"            { symbol SymCloseBrace }

-- Literals
<0,code> \'             { litChar }
<0,code,pragma_> \"     { litString }
<0,code> @integer       { literal' integer LitNat }
<0,code> @float         { literal LitFloat }

-- Identifiers
<0,code,imp_dir_> @q_ident      { identifier }
-- Andreas, 2013-02-21, added identifiers to the 'imp_dir_' state.
-- This is to fix issue 782: 'toz' should not be lexed as 'to'
-- (followed by 'z' after leaving imp_dir_).
-- With identifiers in state imp_dir_, 'toz' should be lexed as
-- identifier 'toz' in imp_dir_ state, leading to a parse error later.

{

-- | This is the initial state for parsing a regular, non-literate file.
normal :: LexState
normal = 0


{-| The layout state. Entered when we see a layout keyword ('withLayout') and
    exited at the next token ('newLayoutBlock').
-}
layout :: LexState
layout = layout_


{-| The state inside a pragma.
-}
pragma :: LexState
pragma = pragma_

-- | The state inside a FOREIGN pragma. This needs to be different so that we don't
--   lex further strings as pragma keywords.
fpragma :: LexState
fpragma = fpragma_

{-| We enter this state from 'newLayoutBlock' when the token following a
    layout keyword is to the left of (or at the same column as) the current
    layout context. Example:

    > data Empty : Set where
    > foo : Empty -> Nat

    Here the second line is not part of the @where@ clause since it is has the
    same indentation as the @data@ definition. What we have to do is insert an
    empty layout block @{}@ after the @where@. The only thing that can happen
    in this state is that 'emptyLayout' is executed, generating the closing
    brace. The open brace is generated when entering by 'newLayoutBlock'.
-}
empty_layout :: LexState
empty_layout = empty_layout_


-- | This state is entered at the beginning of each line. You can't lex
--   anything in this state, and to exit you have to check the layout rule.
--   Done with 'offsideRule'.
bol :: LexState
bol = bol_


-- | This state can only be entered by the parser. In this state you can only
--   lex the keywords @using@, @hiding@, @renaming@ and @to@. Moreover they are
--   only keywords in this particular state. The lexer will never enter this
--   state by itself, that has to be done in the parser.
imp_dir :: LexState
imp_dir = imp_dir_


-- | Return the next token. This is the function used by Happy in the parser.
--
--   @lexer k = 'lexToken' >>= k@
lexer :: (Token -> Parser a) -> Parser a
lexer k = lexToken >>= k

-- | Do not use this function; it sets the 'ParseFlags' to
-- 'undefined'.
alexScan :: AlexInput -> Int -> AlexReturn (LexAction Token)

-- | This is the main lexing function generated by Alex.
alexScanUser :: ([LexState], ParseFlags) -> AlexInput -> Int -> AlexReturn (LexAction Token)

}