# Resources

- Getting Started: https://github.com/scala/scala-parser-combinators/blob/main/docs/Getting_Started.md
- 2.12 2.3.0 API: https://javadoc.io/static/org.scala-lang.modules/scala-parser-combinators_2.12/2.3.0/scala/util/parsing/combinator/index.html
- Building a lexer and parser with Scala's Parser Combinators: https://enear.github.io/2016/03/31/parser-combinators/

# Features

- `RegexParsers`
- `Parsers`
- `Positional`


In [1]:
import $ivy.`org.scala-lang.modules:scala-parser-combinators_2.12:2.3.0`

import scala.util.parsing.combinator._

[32mimport [39m[36m$ivy.$                                                           

[39m
[32mimport [39m[36mscala.util.parsing.combinator._[39m

# BNDF

```BNF
<block> ::= (<statement>)+

<statement> ::= "exit"
              | "read input" (<identifier> ",")* <identifier>
              | "call service" <stringLiteral>
              | "switch" ":" INDENT (<ifThen>)+ [otherwiseThen] DEDENT

<ifThen> ::= <condition> "->" INDENT <block> DEDENT

<otherwiseThen> ::= "otherwise" "->" INDENT <block> DEDENT

<condition> ::= <identifier> "==" <stringLiteral>
```

In [2]:
val input = """read input name, country
switch:
  country == "PT" ->
    call service "A"
    exit
  otherwise ->
    call service "B"
    switch:
      name == "unknown" ->
        exit
      otherwise ->
        call service "C"
        exit"""

[36minput[39m: [32mString[39m = [32m"""read input name, country
switch:
  country == "PT" ->
    call service "A"
    exit
  otherwise ->
    call service "B"
    switch:
      name == "unknown" ->
        exit
      otherwise ->
        call service "C"
        exit"""[39m

# Error

In [3]:
trait WorkflowCompilationError

case class WorkflowLexerError(msg: String) extends WorkflowCompilationError

case class WorkflowParserError(msg: String) extends WorkflowCompilationError

defined [32mtrait[39m [36mWorkflowCompilationError[39m
defined [32mclass[39m [36mWorkflowLexerError[39m
defined [32mclass[39m [36mWorkflowParserError[39m

# Lexer

In [4]:
import scala.util.parsing.input.Positional

// tokens
sealed trait WorkflowToken extends Positional

case class IDENTIFIER(str: String) extends WorkflowToken
case class LITERAL(str: String) extends WorkflowToken
case class INDENTATION(spaces: Int) extends WorkflowToken
case object EXIT extends WorkflowToken
case object READINPUT extends WorkflowToken
case object CALLSERVICE extends WorkflowToken
case object SWITCH extends WorkflowToken
case object OTHERWISE extends WorkflowToken
case object COLON extends WorkflowToken
case object ARROW extends WorkflowToken
case object EQUALS extends WorkflowToken
case object COMMA extends WorkflowToken
case object INDENT extends WorkflowToken
case object DEDENT extends WorkflowToken

[32mimport [39m[36mscala.util.parsing.input.Positional

// tokens
[39m
defined [32mtrait[39m [36mWorkflowToken[39m
defined [32mclass[39m [36mIDENTIFIER[39m
defined [32mclass[39m [36mLITERAL[39m
defined [32mclass[39m [36mINDENTATION[39m
defined [32mobject[39m [36mEXIT[39m
defined [32mobject[39m [36mREADINPUT[39m
defined [32mobject[39m [36mCALLSERVICE[39m
defined [32mobject[39m [36mSWITCH[39m
defined [32mobject[39m [36mOTHERWISE[39m
defined [32mobject[39m [36mCOLON[39m
defined [32mobject[39m [36mARROW[39m
defined [32mobject[39m [36mEQUALS[39m
defined [32mobject[39m [36mCOMMA[39m
defined [32mobject[39m [36mINDENT[39m
defined [32mobject[39m [36mDEDENT[39m

In [5]:
object WorkflowLexer extends RegexParsers {
    override def skipWhitespace = true
    override val whiteSpace = "[ \t\r\f]+".r
    
    def apply(input: String): Either[WorkflowLexerError, List[WorkflowToken]] = {
        parse(tokens, input) match {
            case NoSuccess(msg, next) => Left(WorkflowLexerError(msg))
            case Success(result, next) => Right(result)
        }
    }
    
    def identifier: Parser[IDENTIFIER] = positioned {
        "[a-zA-Z_][a-zA-Z0-9_]*".r ^^ { str => IDENTIFIER(str) }
    }

    def indentation: Parser[INDENTATION] = positioned {
        "\n[ ]*".r ^^ { whitespace =>
            val nSpaces = whitespace.length - 1
            INDENTATION(nSpaces)
        }
    }
    
    def exit          = positioned {"exit"          ^^ (_ => EXIT) }
    def readInput     = positioned {"read input"    ^^ (_ => READINPUT) }
    def callService   = positioned {"call service"  ^^ (_ => CALLSERVICE) }
    def switch        = positioned {"switch"        ^^ (_ => SWITCH) }
    def otherwise     = positioned {"otherwise"     ^^ (_ => OTHERWISE) }
    def colon         = positioned {":"             ^^ (_ => COLON) }
    def arrow         = positioned {"->"            ^^ (_ => ARROW) }
    def equals        = positioned {"=="            ^^ (_ => EQUALS) }
    def comma         = positioned {","             ^^ (_ => COMMA) }
    
    def tokens: Parser[List[WorkflowToken]] = {
        phrase(
            rep1(exit 
                 | readInput 
                 | callService 
                 | switch 
                 | otherwise 
                 | colon 
                 | arrow
                 | equals 
                 | comma 
                 | literal 
                 | identifier 
                 | indentation)) ^^ { rawTokens =>    
            processIndentations(rawTokens)
        }
    }
    
    private def processIndentations(tokens: List[WorkflowToken],
                                    indents: List[Int] = List(0)): List[WorkflowToken] = {
  
        tokens.headOption match {
            // if there is an increase in indentation level, we push this new level into the stack
            // and produce an INDENT
            case Some(INDENTATION(spaces)) if spaces > indents.head => 
                INDENT :: processIndentations(tokens.tail, spaces :: indents)

            // if there is a decrease, we pop from the stack until we have matched the new level,
            // producing a DEDENT for each pop
            case Some(INDENTATION(spaces)) if spaces < indents.head =>
                val (dropped, kept) = indents.partition(_ > spaces)
                (dropped map (_ => DEDENT)) ::: processIndentations(tokens.tail, kept)

            // if the indentation level stays unchanged, no tokens are produced
            case Some(INDENTATION(spaces)) if spaces == indents.head =>
                processIndentations(tokens.tail, indents)

            // other tokens are ignored
            case Some(token) =>
                token :: processIndentations(tokens.tail, indents)

            // the final step is to produce a DEDENT for each indentation level still remaining, thus
            // "closing" the remaining open INDENTS
            case None =>
                indents.filter(_ > 0).map(_ => DEDENT)
        }
    }
        
    def literal: Parser[LITERAL] = positioned {
        """"[^"]*"""".r ^^ { str =>
            val content = str.substring(1, str.length - 1)
            LITERAL(content)
        }
    }
}

defined [32mobject[39m [36mWorkflowLexer[39m

In [6]:
WorkflowLexer(input) match {
    case Left(e) => e
    case Right(tokens) => tokens foreach { t => 
        println(t)
        println(t.pos.longString) // postion of token
        println
    }
}
// TODO: keep pos of INDENT and DEDENT

READINPUT
read input name, country
^

IDENTIFIER(name)
read input name, country
           ^

COMMA
read input name, country
               ^

IDENTIFIER(country)
read input name, country
                 ^

SWITCH
switch:
^

COLON
switch:
      ^

INDENT
<undefined position>

IDENTIFIER(country)
  country == "PT" ->
  ^

EQUALS
  country == "PT" ->
          ^

LITERAL(PT)
  country == "PT" ->
             ^

ARROW
  country == "PT" ->
                  ^

INDENT
<undefined position>

CALLSERVICE
    call service "A"
    ^

LITERAL(A)
    call service "A"
                 ^

EXIT
    exit
    ^

DEDENT
<undefined position>

OTHERWISE
  otherwise ->
  ^

ARROW
  country == "PT" ->
                  ^

INDENT
<undefined position>

CALLSERVICE
    call service "A"
    ^

LITERAL(B)
    call service "B"
                 ^

SWITCH
switch:
^

COLON
switch:
      ^

INDENT
<undefined position>

IDENTIFIER(name)
      name == "unknown" ->
      ^

EQUALS
  country == "PT" ->
          ^

LITE

[36mres5[39m: [32mAny[39m = ()

# Parser

In [7]:
import scala.util.parsing.input.Positional

sealed trait WorkflowAST  extends Positional
case class AndThen(step1: WorkflowAST, step2: WorkflowAST) extends WorkflowAST
case class ReadInput(inputs: Seq[String]) extends WorkflowAST
case class CallService(serviceName: String) extends WorkflowAST
case class Choice(alternatives: Seq[ConditionThen]) extends WorkflowAST
case object Exit extends WorkflowAST

sealed trait ConditionThen  extends Positional { def thenBlock: WorkflowAST }
case class IfThen(predicate: Condition, thenBlock: WorkflowAST) extends ConditionThen
case class OtherwiseThen(thenBlock: WorkflowAST) extends ConditionThen

sealed trait Condition extends Positional
case class Equals(factName: String, factValue: String) extends Condition

[32mimport [39m[36mscala.util.parsing.input.Positional

[39m
defined [32mtrait[39m [36mWorkflowAST[39m
defined [32mclass[39m [36mAndThen[39m
defined [32mclass[39m [36mReadInput[39m
defined [32mclass[39m [36mCallService[39m
defined [32mclass[39m [36mChoice[39m
defined [32mobject[39m [36mExit[39m
defined [32mtrait[39m [36mConditionThen[39m
defined [32mclass[39m [36mIfThen[39m
defined [32mclass[39m [36mOtherwiseThen[39m
defined [32mtrait[39m [36mCondition[39m
defined [32mclass[39m [36mEquals[39m

In [8]:
import scala.util.parsing.input.{Reader, Position, NoPosition}

object WorkflowParser extends Parsers {
    override type Elem = WorkflowToken
    
    class WorkflowTokenReader(tokens: Seq[WorkflowToken]) extends Reader[WorkflowToken] {
        override def first: WorkflowToken = tokens.head
        override def atEnd: Boolean = tokens.isEmpty
        override def pos: Position = tokens.headOption.map(_.pos).getOrElse(NoPosition)
        override def rest: Reader[WorkflowToken] = new WorkflowTokenReader(tokens.tail)
    }
    
    def apply(tokens: Seq[WorkflowToken]): Either[WorkflowParserError, WorkflowAST] = {
        val reader = new WorkflowTokenReader(tokens)
        program(reader) match {
            case NoSuccess(msg, next) => Left(WorkflowParserError(msg))
            case Success(result, next) => Right(result)
        }
    }
    
    private def identifier: Parser[IDENTIFIER] = positioned {
        accept("identifier", { case id @ IDENTIFIER(name) => id })
    }

    private def literal: Parser[LITERAL] = positioned {
        accept("string literal", { case lit @ LITERAL(name) => lit })
    }
    
    def condition: Parser[Equals] = positioned {
        (identifier ~ EQUALS ~ literal) ^^ { case id ~ eq ~ lit => Equals(id.str, lit.str) }
    }
    
    def program: Parser[WorkflowAST] = positioned {
        phrase(block)
    }

    def block: Parser[WorkflowAST] = positioned {
        rep1(statement) ^^ { case stmtList => stmtList reduceRight AndThen }
    }

    def statement: Parser[WorkflowAST] = positioned {
      val exit = EXIT ^^ (_ => Exit)
      val readInput = READINPUT ~ rep(identifier ~ COMMA) ~ identifier ^^ {
        case read ~ inputs ~ IDENTIFIER(lastInput) => ReadInput(inputs.map(_._1.str) ++ List(lastInput))
      }
      val callService = CALLSERVICE ~ literal ^^ {
        case call ~ LITERAL(serviceName) => CallService(serviceName)
      }
      val switch = SWITCH ~ COLON ~ INDENT ~ rep1(ifThen) ~ opt(otherwiseThen) ~ DEDENT ^^ {
        case _ ~ _ ~ _ ~ ifs ~ otherwise ~ _ => Choice(ifs ++ otherwise)
      }
      exit | readInput | callService | switch
    }

    def ifThen: Parser[IfThen] = positioned {
        (condition ~ ARROW ~ INDENT ~ block ~ DEDENT) ^^ {
            case cond ~ _ ~ _ ~ block ~ _ => IfThen(cond, block)
        }
    }

    def otherwiseThen: Parser[OtherwiseThen] = positioned {
        (OTHERWISE ~ ARROW ~ INDENT ~ block ~ DEDENT) ^^ {
            case _ ~ _ ~ _ ~ block ~ _ => OtherwiseThen(block)
        }
    }
}

[32mimport [39m[36mscala.util.parsing.input.{Reader, Position, NoPosition}

[39m
defined [32mobject[39m [36mWorkflowParser[39m

In [9]:
for {
    tokens <- WorkflowLexer(input).right
    ast <- WorkflowParser(tokens).right
} yield ast

[36mres8[39m: [32mEither[39m[[32mProduct[39m with [32mSerializable[39m with [32mWorkflowCompilationError[39m, [32mWorkflowAST[39m] = [33mRight[39m(
  [33mAndThen[39m(
    [33mReadInput[39m([33mList[39m([32m"name"[39m, [32m"country"[39m)),
    [33mChoice[39m(
      [33mList[39m(
        [33mIfThen[39m([33mEquals[39m([32m"country"[39m, [32m"PT"[39m), [33mAndThen[39m([33mCallService[39m([32m"A"[39m), Exit)),
        [33mOtherwiseThen[39m(
          [33mAndThen[39m(
            [33mCallService[39m([32m"B"[39m),
            [33mChoice[39m(
              [33mList[39m(
                [33mIfThen[39m([33mEquals[39m([32m"name"[39m, [32m"unknown"[39m), Exit),
                [33mOtherwiseThen[39m([33mAndThen[39m([33mCallService[39m([32m"C"[39m), Exit))
              )
            )
          )
        )
      )
    )
  )
)

# Pipeline

In [10]:
object WorkflowCompiler {
  def apply(input: String): Either[WorkflowCompilationError, WorkflowAST] = {
    for {
      tokens <- WorkflowLexer(input).right
      ast <- WorkflowParser(tokens).right
    } yield ast
  }
}

defined [32mobject[39m [36mWorkflowCompiler[39m

In [11]:
WorkflowCompiler(input)

[36mres10[39m: [32mEither[39m[[32mWorkflowCompilationError[39m, [32mWorkflowAST[39m] = [33mRight[39m(
  [33mAndThen[39m(
    [33mReadInput[39m([33mList[39m([32m"name"[39m, [32m"country"[39m)),
    [33mChoice[39m(
      [33mList[39m(
        [33mIfThen[39m([33mEquals[39m([32m"country"[39m, [32m"PT"[39m), [33mAndThen[39m([33mCallService[39m([32m"A"[39m), Exit)),
        [33mOtherwiseThen[39m(
          [33mAndThen[39m(
            [33mCallService[39m([32m"B"[39m),
            [33mChoice[39m(
              [33mList[39m(
                [33mIfThen[39m([33mEquals[39m([32m"name"[39m, [32m"unknown"[39m), Exit),
                [33mOtherwiseThen[39m([33mAndThen[39m([33mCallService[39m([32m"C"[39m), Exit))
              )
            )
          )
        )
      )
    )
  )
)