In [None]:
import { display } from "tslab";
import { readFileSync } from "fs";

const css = readFileSync("../style.css", "utf8");
display.html(`<style>${css}</style>`);

In [None]:
import { createToken, Lexer, CstParser, IToken, CstNode, TokenType, ICstVisitor } from "chevrotain";
import { instance } from "@viz-js/viz";

# Resolving Conflicts Using *Precedence Declarations*

In this notebook, we resolve the parsing ambiguities encountered previously. Instead of relying on a "magic" conflict resolution table (like in LR parsers), **LL(k) parsers** (like Chevrotain) handle precedence and associativity directly through the **structure of the grammar rules**.

We will implement a grammar that respects:

1. **Precedence**: Multiplication binds stronger than addition. Power binds strongest.
2. **Associativity**: Addition and multiplication are **left-associative** (`1-2-3` -> `(1-2)-3`). Exponentiation is **right-associative** (`2^3^4` -> `2^(3^4)`).

```
    expr : expr '+' expr
         | expr '-' expr
         | expr '*' expr
         | expr '/' expr
         | expr '^' expr
         | '(' expr ')'
         | NUMBER      
         ;
```

## Specification of the Scanner

We implement a minimal scanner for arithmetic expressions.

Token Definition:

In [None]:
const WhiteSpace = createToken({
  name: "WhiteSpace",
  pattern: /[ \t\n\r]+/,
  group: Lexer.SKIPPED,
});

const NumberTok = createToken({ name: "NUMBER", pattern: /0|[1-9][0-9]*/ });
const Plus = createToken({ name: "Plus", pattern: /\+/ });
const Minus = createToken({ name: "Minus", pattern: /-/ });
const Mult = createToken({ name: "Mult", pattern: /\*/ });
const Div = createToken({ name: "Div", pattern: /\// });
const Pow = createToken({ name: "Pow", pattern: /\^/ });
const LParen = createToken({ name: "LParen", pattern: /\(/ });
const RParen = createToken({ name: "RParen", pattern: /\)/ });

const allTokens = [
  WhiteSpace,
  NumberTok,
  Plus,
  Minus,
  Mult,
  Div,
  Pow,
  LParen,
  RParen,
];

const ArithmeticLexer = new Lexer(allTokens);

## Specification of the Parser
To enforce precedence in LL parsers, we layer the rules:

- `expression` handles the lowest precedence (Addition/Subtraction).
- `term` handles medium precedence (Multiplication/Division).
- `power` handles high precedence (Exponentiation).
- `atomic` handles the highest precedence (Numbers, Parentheses).

This layering forces the parser to process operations in the correct mathematical order.

In [None]:
class ArithmeticParser extends CstParser {
  constructor() {
    super(allTokens);
    this.performSelfAnalysis();
  }

  // Level 1: Addition / Subtraction (Lowest Precedence)
  // structure: term ( ( '+' | '-' ) term )*
  public expression = this.RULE("expression", () => {
    this.SUBRULE(this.term);
    this.MANY(() => {
      this.SUBRULE(this.addOp);
      this.SUBRULE2(this.term);
    });
  });

  // Level 2: Multiplication / Division
  // structure: power ( ( '*' | '/' ) power )*
  public term = this.RULE("term", () => {
    this.SUBRULE(this.power);
    this.MANY(() => {
      this.SUBRULE(this.mulOp);
      this.SUBRULE2(this.power);
    });
  });

  // Level 3: Exponentiation (Right Associative!)
  // structure: atomic ( '^' power )?
  // Note the recursion happens on the RIGHT side for right-associativity.
  public power = this.RULE("power", () => {
    this.SUBRULE(this.atomic);
    this.OPTION(() => {
      this.CONSUME(Pow);
      this.SUBRULE2(this.power);
    });
  });

  // Level 4: Atomic (Highest Precedence)
  public atomic = this.RULE("atomic", () => {
    this.OR([
      { ALT: () => this.CONSUME(NumberTok) },
      {
        ALT: () => {
          this.CONSUME(LParen);
          this.SUBRULE(this.expression);
          this.CONSUME(RParen);
        },
      },
    ]);
  });

  // Helper rules for operators
  public addOp = this.RULE("addOp", () => {
    this.OR([{ ALT: () => this.CONSUME(Plus) }, { ALT: () => this.CONSUME(Minus) }]);
  });

  public mulOp = this.RULE("mulOp", () => {
    this.OR([{ ALT: () => this.CONSUME(Mult) }, { ALT: () => this.CONSUME(Div) }]);
  });
}

const parser = new ArithmeticParser();

## Strict CST to AST Conversion

Since Chevrotain produces a generic CST, we write a visitor to transform it into a clean AST.

- For `expression` and `term`, we need to handle lists of operations to ensure **left-associativity**.
- For `power`, the recursive structure naturally handles **right-associativity**.

We define `ASTNode` as a recursive type that can be a number or a tuple `[Operator, Left, Right]`.

In [None]:
interface ExpressionCtx { term: CstNode[]; addOp?: CstNode[]; }
interface TermCtx { power: CstNode[]; mulOp?: CstNode[]; }
interface PowerCtx { atomic: CstNode[]; Pow?: IToken[]; power?: CstNode[]; }
interface AtomicCtx { NUMBER?: IToken[]; LParen?: IToken[]; expression?: CstNode[]; }
interface OpCtx { Plus?: IToken[]; Minus?: IToken[]; Mult?: IToken[]; Div?: IToken[]; }

// AST Type: number | [Operator, Left, Right]
type ASTNode = number | [string, ASTNode, ASTNode];

const BaseVisitor = parser.getBaseCstVisitorConstructor();

class ToASTVisitor extends BaseVisitor {
  constructor() {
    super();
    this.validateVisitor();
  }

  public expression(ctx: ExpressionCtx): ASTNode {
    // Left-associative folding: Start with the first term
    let result = this.visit(ctx.term[0]);

    if (ctx.addOp && ctx.term.length > 1) {
      // Loop through remaining terms and wrap them: [op, currentResult, nextTerm]
      for (let i = 0; i < ctx.addOp.length; i++) {
        const op = this.visit(ctx.addOp[i]); // returns "+" or "-"
        const rhs = this.visit(ctx.term[i + 1]);
        result = [op, result, rhs];
      }
    }
    return result;
  }

  public term(ctx: TermCtx): ASTNode {
    // Left-associative folding
    let result = this.visit(ctx.power[0]);

    if (ctx.mulOp && ctx.power.length > 1) {
      for (let i = 0; i < ctx.mulOp.length; i++) {
        const op = this.visit(ctx.mulOp[i]); // returns "*" or "/"
        const rhs = this.visit(ctx.power[i + 1]);
        result = [op, result, rhs];
      }
    }
    return result;
  }

  public power(ctx: PowerCtx): ASTNode {
    const base = this.visit(ctx.atomic[0]);

    if (ctx.Pow && ctx.power) {
      // Right-associative: [^, base, visit(rest)]
      // Recursion is handled by the rule structure itself
      const exponent = this.visit(ctx.power[0]);
      return ["^", base, exponent];
    }
    return base;
  }

  public atomic(ctx: AtomicCtx): ASTNode {
    if (ctx.NUMBER) {
      // Zugriff ist jetzt sicher durch das Interface
      return parseInt(ctx.NUMBER[0].image, 10);
    }
    
    if (ctx.LParen && ctx.expression) {
      return this.visit(ctx.expression[0]);
    }
    
    throw new Error("Invalid atomic: Expected NUMBER or expression in parentheses");
  }

  public addOp(ctx: OpCtx): string {
    if (ctx.Plus) return "+";
    if (ctx.Minus) return "-";
    return "?";
  }

  public mulOp(ctx: OpCtx): string {
    if (ctx.Mult) return "*";
    if (ctx.Div) return "/";
    return "?";
  }
}

const toAST = new ToASTVisitor();

## Visualization \& Testing

We reuse the DOT visualization logic. Now, the resulting trees should correctly reflect mathematical rules.

In [None]:
function tuple2dot(t: ASTNode): string {
  let dot = "digraph G {\n node [shape=circle];\n";
  let counter = 0;

  function walk(node: ASTNode, parent?: string): string {
    const id = "n" + counter++;
    
    let label: string;
    if (Array.isArray(node)) {
        label = node[0];
    } else {
        label = String(node);
    }
    
    dot += ` ${id} [label="${label}"];\n`;
    if (parent) dot += ` ${parent} -> ${id};\n`;

    if (Array.isArray(node)) {
        // [op, left, right]
        walk(node[1], id);
        walk(node[2], id);
    }
    return id;
  }

  walk(t);
  dot += "}";
  return dot;
}

async function test(s: string) {
  const lexResult = ArithmeticLexer.tokenize(s);
  parser.input = lexResult.tokens;
  
  const cst = parser.expression();

  if (parser.errors.length > 0) {
    console.error("Syntax error:", parser.errors);
    return;
  }

  const ast = toAST.visit(cst) as ASTNode; 
  console.log(JSON.stringify(ast));

  const dot = tuple2dot(ast);
  const viz = await instance();
  const svg = await viz.renderString(dot, { format: "svg" });

  display.html(svg);
}

Parsing Rules:

In [None]:
function dumpChevrotainGrammar(p: CstParser) {
    // Versions-Check (optional)
    const chevrotainVersion = (Lexer as any).VERSION ?? "Unknown";
    console.log(`Created by Chevrotain v${chevrotainVersion} (https://chevrotain.io)\n`);
    console.log("Grammar\n");

    // Zugriff auf die interne Grammatik-Struktur
    const productions = (p as any).getGAstProductions() as Record<string, { definition: any[] }>;

    // Rekursive Helper-Funktion, um Regeln zu "flatten"
    function printRule(definition: any[]): string {
        return definition.map((d: any) => {
            const type = d.constructor.name;
            
            // Einfache Terminals & Non-Terminals
            if (type === "NonTerminal") {
                return d.nonTerminalName;
            } else if (type === "Terminal") {
                return d.terminalType.name; // Token Name
            } 
            
            // Komplexe Strukturen
            else if (type === "Option") {
                return `(${printRule(d.definition)})?`;
            } else if (type === "Repetition" || type === "RepetitionMandatory") {
                return `(${printRule(d.definition)})*`;
            } else if (type === "Alternation") {
                // Alternativen sind meist in einem 'definition' Array
                const alts = d.definition.map((alt: any) => printRule(alt.definition)).join(" | ");
                return `( ${alts} )`;
            }
            
            return ""; // Fallback
        }).join(" ");
    }

    let ruleId = 0;
    for (const [name, prod] of Object.entries(productions)) {
        const rhs = printRule(prod.definition);
        // Formatierung für schöne Ausrichtung
        console.log(`Rule ${String(ruleId++).padEnd(5)} ${name} -> ${rhs}`);
    }

    console.log("\nParsing method: LL(k) recursive-descent\n");
}

// Aufruf
dumpChevrotainGrammar(parser);

In [None]:
await test("2^3^4*5+6-7/8^9")

In [None]:
await test('1+2*3^4')

In [None]:
await test('1 * 2 + 3^4^5')