# 1 EXAMPLE: ARITHMETIC EXPRESSIONS

```BNF
expr ::= term \{"+" term | "-" term\}.
term ::= factor \{"*" factor | "/" factor\}.
factor ::= floatingPointNumber | "(" expr ")".
```

In [1]:
import $ivy.`org.scala-lang.modules:scala-parser-combinators_2.12:2.3.0`

import scala.util.parsing.combinator._

[32mimport [39m[36m$ivy.$                                                           

[39m
[32mimport [39m[36mscala.util.parsing.combinator._[39m

- Code: https://github.com/scala/scala-parser-combinators
- 2.3.0 API: https://javadoc.io/static/org.scala-lang.modules/scala-parser-combinators_2.12/2.3.0/scala/util/parsing/combinator/index.html

In [2]:
class Arith extends JavaTokenParsers {
    def expr: Parser[Any] = term ~ rep("+" ~ term | "-" ~ term)
    def term: Parser[Any] = factor ~ rep("*" ~ factor | "/" ~ factor)
    def factor: Parser[Any] = floatingPointNumber | "(" ~ expr ~ ")"
}

defined [32mclass[39m [36mArith[39m

# 2 RUNNING YOUR PARSER

In [3]:
val arith = new Arith

[36marith[39m: [32mArith[39m = ammonite.$sess.cmd1$Helper$Arith@2719f805

In [4]:
val input = "2 * (3 + 7)"
arith.parseAll(arith.expr, input)

[36minput[39m: [32mString[39m = [32m"2 * (3 + 7)"[39m
[36mres3_1[39m: [32marith[39m.[32mParseResult[39m[[32mAny[39m] = [33mSuccess[39m(
  [32m"2"[39m ~ [33mList[39m([32m"*"[39m ~ [32m"("[39m ~ [32m"3"[39m ~ [33mList[39m() ~ [33mList[39m([32m"+"[39m ~ [32m"7"[39m ~ [33mList[39m()) ~ [32m")"[39m) ~ [33mList[39m(),
  CharSequenceReader()
)

In [5]:
val input = "2 * (3 + 7))"
println(arith.parseAll(arith.expr, input))

[1.12] failure: '/' expected but ')' found

2 * (3 + 7))
           ^


[36minput[39m: [32mString[39m = [32m"2 * (3 + 7))"[39m

# 3 BASIC REGULAR EXPRESSION PARSERS

In [6]:
object MyParsers extends RegexParsers {
    def ident = """[a-zA-Z_]\w*""".r
}

MyParsers.parseAll(MyParsers.ident, "abd")

defined [32mobject[39m [36mMyParsers[39m
[36mres5_1[39m: [32mMyParsers[39m.[32mParseResult[39m[[32mString[39m] = [33mSuccess[39m([32m"abd"[39m, CharSequenceReader())

# 4 ANOTHER EXAMPLE: JSON

```BNF
value ::= obj | arr | stringLiteral |
    floatingPointNumber |
    "null" | "true" | "false".
obj ::= "{" [members] "}".
arr ::= "[" [values] "]".
members ::= member \{"," member\}.
member ::= stringLiteral ":" value.
values ::= value \{"," value\}.
```

In [7]:
val jsonData = """{
     "address book": {
         "name": "John Smith",
         "address": {
             "street": "10 Market Street",
             "city" : "San Francisco, CA",
             "zip" : 94111
         },
         "phone numbers": [
             "408 338-4238",
             "408 111-6892"
         ]
     }
 }"""

[36mjsonData[39m: [32mString[39m = [32m"""{
     "address book": {
         "name": "John Smith",
         "address": {
             "street": "10 Market Street",
             "city" : "San Francisco, CA",
             "zip" : 94111
         },
         "phone numbers": [
             "408 338-4238",
             "408 111-6892"
         ]
     }
 }"""[39m

In [8]:
import scala.util.parsing.combinator._

class JSON extends JavaTokenParsers {
    def value: Parser[Any] 
    = (obj
       | arr
       | stringLiteral
       | floatingPointNumber
       | "null"
       | "true"
       | "false")
    def obj: Parser[Any] = "{" ~ members ~ "}"
    def arr: Parser[Any] = "[" ~ values ~ "]"
    def members: Parser[Any] = repsep(member, ",")
    def member: Parser[Any] = stringLiteral ~ ":" ~ value
    def values: Parser[Any] = repsep(value, ",")
}

[32mimport [39m[36mscala.util.parsing.combinator._

[39m
defined [32mclass[39m [36mJSON[39m

In [9]:
val jsonParsers = new JSON
jsonParsers.parseAll(jsonParsers.value, jsonData)

[36mjsonParsers[39m: [32mJSON[39m = ammonite.$sess.cmd7$Helper$JSON@5e175161
[36mres8_1[39m: [32mjsonParsers[39m.[32mParseResult[39m[[32mAny[39m] = [33mSuccess[39m(
  [32m"{"[39m ~ [33mList[39m(
    [32m"\"address book\""[39m ~ [32m":"[39m ~ [32m"{"[39m ~ [33mList[39m(
      [32m"\"name\""[39m ~ [32m":"[39m ~ [32m"\"John Smith\""[39m,
      [32m"\"address\""[39m ~ [32m":"[39m ~ [32m"{"[39m ~ [33mList[39m(
        [32m"\"street\""[39m ~ [32m":"[39m ~ [32m"\"10 Market Street\""[39m,
        [32m"\"city\""[39m ~ [32m":"[39m ~ [32m"\"San Francisco, CA\""[39m,
        [32m"\"zip\""[39m ~ [32m":"[39m ~ [32m"94111"[39m
      ) ~ [32m"}"[39m,
      [32m"\"phone numbers\""[39m ~ [32m":"[39m ~ [32m"["[39m ~ [33mList[39m([32m"\"408 338-4238\""[39m, [32m"\"408 111-6892\""[39m) ~ [32m"]"[39m
    ) ~ [32m"}"[39m
  ) ~ [32m"}"[39m,
  CharSequenceReader()
)

# 5 PARSER OUTPUT

In [10]:
import scala.util.parsing.combinator._

class JSON extends JavaTokenParsers {

    def obj: Parser[Map[String, Any]] = "{" ~> members <~ "}" ^^ ( Map() ++ _)
    
    def arr: Parser[List[Any]] = "["~> values <~"]"
    
    def members: Parser[Map[String, Any]] = repsep(member, ",") ^^ (_.toMap)
    
    def member: Parser[(String, Any)] = stringLiteral ~ ":" ~ value ^^ {
        case key ~ ":" ~ v => (key, v)
    }
    
    def values: Parser[List[Any]] = repsep(value, ",")
        
    def value: Parser[Any] = (
        obj
        | arr
        | stringLiteral
        | floatingPointNumber ^^ (_.toDouble)
        | "null" ^^ (x => null)
        | "true" ^^ (x => true)
        | "false" ^^ (x => false)
    )
}

val jsonParsers = new JSON
jsonParsers.parseAll(jsonParsers.value, jsonData)

[32mimport [39m[36mscala.util.parsing.combinator._

[39m
defined [32mclass[39m [36mJSON[39m
[36mjsonParsers[39m: [32mJSON[39m = ammonite.$sess.cmd9$Helper$JSON@14ef7b11
[36mres9_3[39m: [32mjsonParsers[39m.[32mParseResult[39m[[32mAny[39m] = [33mSuccess[39m(
  [33mMap[39m(
    [32m"\"address book\""[39m -> [33mMap[39m(
      [32m"\"name\""[39m -> [32m"\"John Smith\""[39m,
      [32m"\"address\""[39m -> [33mMap[39m(
        [32m"\"street\""[39m -> [32m"\"10 Market Street\""[39m,
        [32m"\"city\""[39m -> [32m"\"San Francisco, CA\""[39m,
        [32m"\"zip\""[39m -> [32m94111.0[39m
      ),
      [32m"\"phone numbers\""[39m -> [33mList[39m([32m"\"408 338-4238\""[39m, [32m"\"408 111-6892\""[39m)
    )
  ),
  CharSequenceReader()
)

Table 33.1 - Summary of parser combinators

- `"..."`: literal
- `"...".r`: regular expression
- `P~Q`: sequential composition
- `P <~ Q`, `P ~> Q`: sequential composition; keep left/right only
- `P | Q`: alternative
- `opt(P)`: option
- `rep(P)`: repetition
- `repsep(P, Q)`: interleaved repetition
- `P ^^ f`: result conversion


# 6 IMPLEMENTING COMBINATOR PARSERS

```scala
package scala.util.parsing.combinator

trait Parsers {
    type Elem
    type Input = Reader[Elem]
    
    sealed abstract class ParseResult[+T]
    case class Success[T](result: T, in: Input) extends ParseResult[T]
    sealed abstract class NoSuccess extends ParseResult[Nothing]
    case class Failure(msg: String, next: Input) extends NoSuccess
    case class Error(msg: String, next: Input) extends NoSuccess
    
    abstract class Parser[+T] extends (Input) ⇒ ParseResult[T] {
        def ~[U](q: ⇒ Parser[U]): Parser[~[T, U]]
        def <~[U](q: ⇒ Parser[U]): Parser[T]
        def ~>[U](q: ⇒ Parser[U]): Parser[U]
        def |[U >: T](q: ⇒ Parser[U]): Parser[U]
        def ^^[U](f: (T) ⇒ U): Parser[U]
    }
    
    trait OnceParser[+T] extends Parser[T]
    
    case class ~[+a, +b](_1: a, _2: b)
    
    def elem(e: Elem): Parser[Elem]
    def success[T](v: T): Parser[T]
    def failure(msg: String): Parser[Nothing]
    def opt[T](p: ⇒ Parser[T]): Parser[Option[T]]
    def rep[T](p: ⇒ Parser[T]): Parser[List[T]]
    def repsep[T](p: ⇒ Parser[T], q: ⇒ Parser[Any]): Parser[List[T]]
}
```

```scala
package scala.util.parsing.input

abstract class Reader[+T]
```

Dealing with recursion

```scala
// the parameter of | is a by-name parameter
def parens = floatingPointNumber | "("~parens~")"
```

# 7 STRING LITERALS AND REGULAR EXPRESSIONS

```scala
package scala.util.parsing.combinator

trait RegexParsers extends Parsers {
    type Elem = Char
    
    implicit def literal(s: String): Parser[String]
    implicit def regex(r: Regex): Parser[String]
    
    protected val whiteSpace = """\s+""".r
    protected def handleWhiteSpace(source: java.lang.CharSequence, offset: Int): Int = {...}
}
```

# 8 LEXING AND PARSING

```scala
package scala.util.parsing.combinator.lexical
package scala.util.parsing.combinator.syntactical
```

# 9 ERROR REPORTING

Scala's parsing library implements a simple heuristic: among all failures, 
the one that occurred at the  latest position in the input is chosen.

```scala
[1.13] failure: "false" expected but identifier John found
 
 { "name": John,
           ^
def value: Parser[Any] =
  obj | arr | stringLit | floatingPointNumber | "null" |
  "true" | "false" | failure("illegal start of value")  

  
[1.13] failure: illegal start of value
 
 { "name": John,
           ^  
```
  
```scala
trait Parsers {
  
    case class Success[+T](result: T, override val next: Input) extends ParseResult[T] {
        def lastFailure: Option[Failure] = None
    }

  def phrase[T](p: Parser[T]) = new Parser[T] {
    def apply(in: Input) = p(in) match {
      case s @ Success(_, in1) =>
        if (in1.atEnd) s
        else s.lastFailure match {
          case Some(failure) => failure
          case _ => Failure("end of input expected", in1)
        }
      case ns => ns
    }
  }
}
```

# 10 BACKTRACKING VERSUS LL(1)

The parser combinators employ **backtracking** to choose between different parsers in an alternative. 
In  an expression `P | Q`, if `P` fails, then `Q` is run on the same input as `P`. 
This happens even if `P` has parsed some tokens before failing. 
In this case the same tokens will be parsed again by `Q`.

```BNF
//  need to avoid left-recursive productions
expr ::= expr "+" term | term.
// backtracking is potentially costly: (1 + 2) * 3
expr ::= term "+" expr | term.

// modify the grammar so that backtracking can be avoided
expr ::= term ["+" expr].
expr ::= term \{"+" term\}.
```

Many languages admit so-called "LL(1)" grammars.
When a combinator parser is formed from such a grammar, it will never backtrack, i.e., 
the input position will never be reset to an earlier value.

The combinator parsing framework allows you to express the expectation that a grammar is LL(1) explicitly, 
using a new operator `~!`. 
This operator is like sequential composition `~` but it will never 
backtrack to "un-read" input elements that have already been parsed.

```scala
def expr : Parser[Any] = 
    term ~! rep("+" ~! term | "-" ~! term)
def term : Parser[Any] =
    factor ~! rep("*" ~! factor | "/" ~! factor)
def factor: Parser[Any] =
    "(" ~! expr ~! ")" | floatingPointNumber
```