Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
WIP integrate Lexer.x into new parser.
- Loading branch information
1 parent
afed095
commit 059a158
Showing
2 changed files
with
240 additions
and
0 deletions.
There are no files selected for viewing
96 changes: 96 additions & 0 deletions
96
plutus-core/untyped-plutus-core/src/UntypedPlutusCore/Parser/Lexer.hs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
{-# LANGUAGE OverloadedStrings #-} | ||
{-# LANGUAGE RecordWildCards #-} | ||
{-# LANGUAGE TypeFamilies #-} | ||
module UntypedPlutusCore.Lexer where | ||
|
||
import PlutusCore.Lexer | ||
import PlutusCore.Lexer.Type as L (Token) | ||
import PlutusPrelude (NonEmpty ((:|)), Pretty (pretty), Render (render)) | ||
|
||
import qualified Data.List as DL | ||
import qualified Data.List.NonEmpty as NE | ||
import Data.Proxy (Proxy (..)) | ||
import Text.Megaparsec | ||
|
||
data WithPos a = WithPos | ||
{ startPos :: SourcePos | ||
, endPos :: SourcePos | ||
, tokenLength :: Int | ||
, tokenVal :: a | ||
} deriving (Eq, Ord, Show) | ||
|
||
data MyStream = MyStream | ||
{ myStreamInput :: String -- for showing offending lines | ||
, unMyStream :: [WithPos (L.Token AlexPosn)] | ||
} | ||
instance Stream MyStream where | ||
type Token MyStream = WithPos (L.Token AlexPosn) | ||
type Tokens MyStream = [WithPos (L.Token AlexPosn)] | ||
|
||
tokenToChunk Proxy x = [x] | ||
tokensToChunk Proxy xs = xs | ||
chunkToTokens Proxy = id | ||
chunkLength Proxy = length | ||
chunkEmpty Proxy = null | ||
take1_ (MyStream _ []) = Nothing | ||
take1_ (MyStream str (t:ts)) = Just | ||
( t | ||
, MyStream (drop (tokensLength pxy (t:|[])) str) ts | ||
) | ||
takeN_ n (MyStream str s) | ||
| n <= 0 = Just ([], MyStream str s) | ||
| null s = Nothing | ||
| otherwise = | ||
let (x, s') = splitAt n s | ||
in case NE.nonEmpty x of | ||
Nothing -> Just (x, MyStream str s') | ||
Just nex -> Just (x, MyStream (drop (tokensLength pxy nex) str) s') | ||
takeWhile_ f (MyStream str s) = | ||
let (x, s') = DL.span f s | ||
in case NE.nonEmpty x of | ||
Nothing -> (x, MyStream str s') | ||
Just nex -> (x, MyStream (drop (tokensLength pxy nex) str) s') | ||
|
||
showToken :: L.Token AlexPosn -> String | ||
showToken = render . pretty | ||
|
||
instance VisualStream MyStream where | ||
showTokens Proxy = unwords | ||
. NE.toList | ||
. fmap (showToken . tokenVal) | ||
tokensLength Proxy xs = sum (tokenLength <$> xs) | ||
|
||
instance TraversableStream MyStream where | ||
reachOffset o PosState {..} = | ||
( Just (prefix ++ restOfLine) | ||
, PosState | ||
{ pstateInput = MyStream | ||
{ myStreamInput = postStr | ||
, unMyStream = post | ||
} | ||
, pstateOffset = max pstateOffset o | ||
, pstateSourcePos = newSourcePos | ||
, pstateTabWidth = pstateTabWidth | ||
, pstateLinePrefix = prefix | ||
} | ||
) | ||
where | ||
prefix = | ||
if sameLine | ||
then pstateLinePrefix ++ preStr | ||
else preStr | ||
sameLine = sourceLine newSourcePos == sourceLine pstateSourcePos | ||
newSourcePos = | ||
case post of | ||
[] -> pstateSourcePos | ||
(x:_) -> startPos x | ||
(pre, post) = splitAt (o - pstateOffset) (unMyStream pstateInput) | ||
(preStr, postStr) = splitAt tokensConsumed (myStreamInput pstateInput) | ||
tokensConsumed = | ||
case NE.nonEmpty pre of | ||
Nothing -> 0 | ||
Just nePre -> tokensLength pxy nePre | ||
restOfLine = takeWhile (/= '\n') postStr | ||
|
||
pxy :: Proxy MyStream | ||
pxy = Proxy |
144 changes: 144 additions & 0 deletions
144
plutus-core/untyped-plutus-core/src/UntypedPlutusCore/Parser/NewParser.hs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,144 @@ | ||
{-# LANGUAGE AllowAmbiguousTypes #-} | ||
{-# LANGUAGE FlexibleContexts #-} | ||
{-# LANGUAGE OverloadedStrings #-} | ||
{-# LANGUAGE RankNTypes #-} | ||
{-# LANGUAGE TypeApplications #-} | ||
{-# LANGUAGE TypeOperators #-} | ||
|
||
{-# OPTIONS_GHC -fno-warn-orphans #-} | ||
|
||
-- | Parser for untyped Plutus Core using megaparsec, as in Plutus IR. | ||
-- Will replace UntypedPlutusCore.Parser.hs. | ||
-- Parser.y and Lexer.x, which currently generate Parser.hs, will be removed. | ||
|
||
module UntypedPlutusCore.NewParser | ||
( topSourcePos | ||
, parse | ||
, parseQuoted | ||
, term | ||
, program | ||
, parseTerm | ||
, parseProgram | ||
, parseScoped | ||
, Parser | ||
, SourcePos | ||
) where | ||
|
||
import Prelude hiding (fail) | ||
|
||
import Control.Monad.Except ((<=<)) | ||
import Control.Monad.State (StateT) | ||
|
||
import qualified NewUntypedPlutusCore as UPLC | ||
import qualified PlutusCore as PLC | ||
import qualified PlutusCore.Parsable as PLC | ||
import PlutusPrelude (Pretty, through) | ||
import Text.Megaparsec hiding (ParseError, State, parse) | ||
import UntypedPlutusCore.Check.Uniques (checkProgram) | ||
import UntypedPlutusCore.Rename (Rename (rename)) | ||
|
||
import Data.ByteString.Lazy (ByteString) | ||
import Data.ByteString.Lazy.Internal (unpackChars) | ||
import qualified Data.Text as T | ||
import PlutusCore.ParserCommon | ||
|
||
-- Parsers for UPLC terms | ||
|
||
conTerm | ||
:: (PLC.Closed uni, uni `PLC.Everywhere` PLC.Parsable, PLC.Parsable (PLC.SomeTypeIn (PLC.Kinded uni))) | ||
=> Parser SourcePos (UPLC.Term PLC.Name uni fun SourcePos) | ||
conTerm = inParens $ UPLC.Constant <$> wordPos "con" <*> constant | ||
|
||
builtinTerm :: (Bounded fun, Enum fun, Pretty fun) | ||
=> Parser SourcePos (UPLC.Term PLC.Name uni fun SourcePos) | ||
builtinTerm = inParens $ UPLC.Builtin <$> wordPos "builtin" <*> builtinFunction | ||
|
||
varTerm :: Parser SourcePos (UPLC.Term PLC.Name uni fun SourcePos) | ||
varTerm = UPLC.Var <$> getSourcePos <*> name | ||
|
||
lamTerm :: ParsecT (PLC.ParseError SourcePos) | ||
T.Text | ||
(StateT ParserState PLC.Quote) | ||
(UPLC.Term PLC.Name uni fun SourcePos) | ||
-> Parser SourcePos (UPLC.Term PLC.Name uni fun SourcePos) | ||
lamTerm tm = inParens $ UPLC.LamAbs <$> wordPos "lam" <*> name <*> tm | ||
|
||
appTerm :: ParsecT (PLC.ParseError SourcePos) | ||
T.Text | ||
(StateT ParserState PLC.Quote) | ||
(UPLC.Term PLC.Name uni fun SourcePos) | ||
-> Parser SourcePos (UPLC.Term PLC.Name uni fun SourcePos) | ||
appTerm tm = inBrackets $ UPLC.Apply <$> getSourcePos <*> tm <*> tm | ||
|
||
delayTerm :: ParsecT (PLC.ParseError SourcePos) | ||
T.Text | ||
(StateT ParserState PLC.Quote) | ||
(UPLC.Term PLC.Name uni fun SourcePos) | ||
-> Parser SourcePos (UPLC.Term PLC.Name uni fun SourcePos) | ||
delayTerm tm = inParens $ UPLC.Delay <$> wordPos "abs" <*> tm | ||
|
||
forceTerm :: ParsecT (PLC.ParseError SourcePos) | ||
T.Text | ||
(StateT ParserState PLC.Quote) | ||
(UPLC.Term PLC.Name uni fun SourcePos) | ||
-> Parser SourcePos (UPLC.Term PLC.Name uni fun SourcePos) | ||
forceTerm tm = inBraces $ UPLC.Force <$> getSourcePos <*> tm | ||
|
||
errorTerm | ||
:: Parser SourcePos (UPLC.Term PLC.Name uni fun SourcePos) | ||
errorTerm = inParens $ UPLC.Error <$> wordPos "error" | ||
|
||
-- | Parser for all UPLC terms. | ||
term | ||
:: ( PLC.Parsable (PLC.Some uni), PLC.Closed uni, uni `PLC.Everywhere` PLC.Parsable | ||
, Bounded fun, Enum fun, Pretty fun, PLC.Parsable (PLC.SomeTypeIn (PLC.Kinded uni))) | ||
=> Parser SourcePos (UPLC.Term PLC.Name uni fun SourcePos) | ||
term = conTerm | ||
<|> builtinTerm | ||
<|> varTerm | ||
<|> lamTerm self | ||
<|> appTerm self | ||
<|> delayTerm self | ||
<|> forceTerm self | ||
<|> errorTerm | ||
where self = term | ||
|
||
-- | Parser for UPLC programs. | ||
program | ||
:: ( PLC.Parsable (PLC.Some uni), PLC.Closed uni, uni `PLC.Everywhere` PLC.Parsable | ||
, Bounded fun, Enum fun, Pretty fun, PLC.Parsable (PLC.SomeTypeIn (PLC.Kinded uni))) | ||
=> Parser SourcePos (UPLC.Program PLC.Name uni fun SourcePos) | ||
program = whitespace >> do | ||
prog <- inParens $ UPLC.Program <$> wordPos "program" <*> version <*> term | ||
notFollowedBy anySingle | ||
return prog | ||
|
||
-- | Generic parser function. | ||
parseGen :: Parser SourcePos a -> ByteString -> Either (ParseErrorBundle T.Text (PLC.ParseError SourcePos)) a | ||
parseGen stuff bs = parse stuff "test" $ (T.pack . unpackChars) bs | ||
|
||
-- | Parse a PLC term. The resulting program will have fresh names. The underlying monad must be capable | ||
-- of handling any parse errors. | ||
parseTerm :: | ||
(PLC.Parsable (PLC.Some uni), PLC.Closed uni, uni `PLC.Everywhere` PLC.Parsable | ||
, Bounded fun, Enum fun, Pretty fun, PLC.Parsable (PLC.SomeTypeIn (PLC.Kinded uni))) => | ||
ByteString -> | ||
Either (ParseErrorBundle T.Text (PLC.ParseError SourcePos)) (UPLC.Term PLC.Name uni fun SourcePos) | ||
parseTerm = parseGen term | ||
|
||
-- | Parse a PLC program. The resulting program will have fresh names. The underlying monad must be capable | ||
-- of handling any parse errors. | ||
parseProgram :: (PLC.Parsable (PLC.Some uni), PLC.Closed uni, uni `PLC.Everywhere` PLC.Parsable | ||
, Bounded fun, Enum fun, Pretty fun, PLC.Parsable (PLC.SomeTypeIn (PLC.Kinded uni))) => ByteString -> Either (ParseErrorBundle T.Text (PLC.ParseError SourcePos)) (UPLC.Program PLC.Name uni fun SourcePos) | ||
parseProgram = parseGen program | ||
|
||
-- | Parse and rewrite so that names are globally unique, not just unique within | ||
-- their scope. | ||
parseScoped | ||
:: (PLC.MonadQuote (Either (ParseErrorBundle T.Text (PLC.ParseError SourcePos))) | ||
, PLC.AsUniqueError (ParseErrorBundle T.Text (PLC.ParseError SourcePos)) SourcePos | ||
, PLC.Parsable (PLC.Some PLC.DefaultUni)) | ||
=> ByteString | ||
-> Either (ParseErrorBundle T.Text (PLC.ParseError SourcePos)) (UPLC.Program PLC.Name PLC.DefaultUni PLC.DefaultFun SourcePos) | ||
-- don't require there to be no free variables at this point, we might be parsing an open term | ||
parseScoped = through (checkProgram (const True)) <=< rename <=< parseProgram |