In [1]:
import { display } from "tslab";
import { readFileSync } from "fs";

const css = readFileSync("../style.css", "utf8");
display.html(`<style>${css}</style>`);

## 1. Abstract Syntax Tree (AST) Definitions

Before we can transform the parser's output, we must define the target structure.
We define the set of supported operators $\mathcal{O}$ and the set of possible AST nodes $\mathcal{N}$.

Let $\mathcal{O} = \{ +, -, *, /, \%, ==, !=, <=, >=, <, > \}$.

The Abstract Syntax Tree is a recursive data structure where a node $n \in \mathcal{N}$ is either:
1.  **A Primitive**: A number or a string (representing variables).
2.  **A Structured Node**: An object containing a discriminator field `kind` and specific properties depending on that kind.

We define the following node interfaces using TypeScript's discriminated unions to ensure type safety during traversal.

In [2]:
// Define the allowed operators
type Operator =
    | "+"
    | "-"
    | "*"
    | "/"
    | "%"
    | "=="
    | "!="
    | "<="
    | ">="
    | "<"
    | ">";

// Base Interface for all complex nodes
interface BaseNode {
    kind: string;
}

// Specific Node Types
interface ProgramNode extends BaseNode {
    kind: "Program" | "Block";
    statements: AST[];
}

interface AssignNode extends BaseNode {
    kind: "Assignment";
    id: string;
    expr: AST;
}

interface ControlNode extends BaseNode {
    kind: "If" | "While";
    condition: AST;
    body: AST;
}

interface ExprStmtNode extends BaseNode {
    kind: "ExprStmt";
    expr: AST;
}

interface BinaryNode extends BaseNode {
    kind: "BinaryExpr";
    op: Operator;
    left: AST;
    right: AST;
}

interface CallNode extends BaseNode {
    kind: "Call";
    funcName: string;
    args: AST[];
}

// The recursive AST Union Type
type AST =
    | number
    | string
    | ProgramNode
    | AssignNode
    | ControlNode
    | ExprStmtNode
    | BinaryNode
    | CallNode;

## 2. Generic CST-to-AST Mapper

The Lezer parser produces a **Concrete Syntax Tree (CST)**, which contains all syntactic details. We define a transformation function $\mathcal{T}$ that converts a CST cursor $C$ into an AST node (or a list of nodes).

**Formal Definition:**

Let $C$ be the current cursor position in the CST.
Let $\mathcal{R}$ be the set of transformation rules defined in our configuration.
Let $S$ be the source code string.

The transformation $\mathcal{T}(C, S)$ proceeds as follows:

1.  **Recursion**: We collect the transformed results of all children of $C$, excluding any node types marked as *ignored* (e.g., whitespace, punctuation).
    $$children' = [ \mathcal{T}(child, S) \mid child \in C.children \land child.name \notin \text{IgnoreSet} ]$$

2.  **Rule Application**:
    If a specific rule exists for the current node name $name_C$, we apply it:
    $$\text{if } \exists r \in \mathcal{R}[name_C] \implies \text{return } r(children', S[C.from \dots C.to])$$

3.  **Literal Extraction**:
    If the node name matches a specific pattern (e.g., "Identifier"), we return the raw text content:
    $$\text{if } name_C \in \text{Literals} \implies \text{return } S[C.from \dots C.to]$$

4.  **Default Handling**:
    If no rule applies, we attempt to unwrap the node. If it has exactly one child, we return the child; otherwise, we return the raw text.

To handle cases where a rule returns a list of nodes (e.g., argument lists), we introduce a helper type `Mapped` which can be `one(AST)` or `many(AST[])`.

In [3]:
import { TreeCursor } from "@lezer/common";

// --- Helper Types for the Mapper ---

// Intermediate result: Can be a single AST node OR a list of nodes
type Mapped =
    | { kind: "one"; value: AST }
    | { kind: "many"; value: AST[] };

// Helper Constructors with explicit return types
const one = (value: AST): Mapped => ({ kind: "one", value });
const many = (value: AST[]): Mapped => ({ kind: "many", value });

// Runtime Assertions (to safely unwrap Mapped values)
function asOne(x: Mapped, ctx: string): AST {
    if (x.kind !== "one") throw new Error(`[${ctx}] Expected single node.`);
    return x.value;
}

function asMany(x: Mapped, ctx: string): AST[] {
    if (x.kind !== "many") throw new Error(`[${ctx}] Expected list.`);
    return x.value;
}

function asString(x: Mapped, ctx: string): string {
    const val: AST = asOne(x, ctx);
    if (typeof val !== "string") throw new Error(`[${ctx}] Expected string.`);
    return val;
}

// --- The Transformation Logic ---

// A Rule transforms children and text into a Mapped result
type NodeTransform = (ctx: {
    children: Mapped[];
    text: string;
}) => Mapped;

interface ASTConfig {
    ignore: Set<string>;
    rules: Record<string, NodeTransform>;
    treatAsLiteral?: RegExp;
}

function genericLezerToAST(
    cursor: TreeCursor,
    source: string,
    config: ASTConfig,
): Mapped {
    const name: string = cursor.name;
    const text: string = source.slice(cursor.from, cursor.to);

    // 1. Collect Children (Recursion)
    const children: Mapped[] = [];
    if (cursor.firstChild()) {
        do {
            const childName: string = cursor.name;
            if (!config.ignore.has(childName)) {
                children.push(genericLezerToAST(cursor, source, config));
            }
        } while (cursor.nextSibling());
        cursor.parent();
    }

    // 2. Apply Specific Rule
    const rule: NodeTransform | undefined = config.rules[name];
    if (rule) {
        return rule({ children, text });
    }

    // 3. Literal Extraction (Regex)
    if (config.treatAsLiteral && config.treatAsLiteral.test(name)) {
        return one(text);
    }

    // 4. Default Fallback (Unwrap)
    if (children.length === 1) return children[0];
    if (children.length > 1) return children[0]; // Ambiguous fallback

    return one(text); // Leaf fallback
}

## 3. Visualization Generator (`ast2dot`)

To verify the structure of our AST, we generate a description in the **DOT** graph description language.

**Algorithm:**

We perform a **Pre-Order Traversal** of the AST.
We maintain a global counter $id$ to assign a unique integer to every visited node.

For each node $n$:
1.  **Node Rendering**: We generate a DOT node definition `node_id [label=...]`.
    * **Leafs** ($n \in \text{string} \cup \text{number}$) are drawn as ellipses.
    * **Inner Nodes** are drawn as boxes. The label corresponds to $n.kind$. Specific visual attributes (Color, Shape) are determined by the node type (e.g., Control Flow nodes are green, Operators are blue).

2.  **Edge Generation**: For every property $p$ of $n$ that contains a child node $c$:
    * We recursively traverse $c$ to obtain its ID $id_c$.
    * We draw a directed edge $id \xrightarrow{p} id_c$.

This results in a directed graph representing the exact hierarchy of the AST.

In [4]:
function escapeHtml(unsafe: string): string {
    return unsafe
        .replace(/&/g, "&amp;")
        .replace(/</g, "&lt;")
        .replace(/>/g, "&gt;")
        .replace(/"/g, "&quot;")
        .replace(/'/g, "&#039;");
}

function ast2dot(tree: AST): string {
    const lines: string[] = [
        "digraph AST {",
        '  node [shape=box, fontname=Helvetica, fontsize=10, style=filled, fillcolor="#f0f0f0"];',
        "  edge [fontname=Helvetica, fontsize=9];",
    ];
    let idCounter: number = 0;

    // Helper to draw a leaf node immediately
    function drawLeaf(val: string | number, color: string = "white"): number {
        const id: number = idCounter++;
        lines.push(
            `  node${id} [label="${val}", shape=ellipse, fillcolor="${color}"];`,
        );
        return id;
    }

    // Recursive Traversal Function
    function traverse(node: AST): number {
        // 1. Primitive Leaves
        if (typeof node === "number") return drawLeaf(node);
        if (typeof node === "string") return drawLeaf(node);

        // 2. Complex Nodes
        const myId: number = idCounter++;
        const name: string = `node${myId}`;

        let label: string = "";
        let color: string = "#f0f0f0";

        // Structure to hold edges to be drawn
        const children: { edgeLabel?: string; id: number }[] = [];

        // Determine Layout based on Node Kind
        switch (node.kind) {
            case "Program":
            case "Block":
                label = ".";
                color = "#ffeeba";
                node.statements.forEach((stmt: AST, i: number) => {
                    children.push({ edgeLabel: `${i}`, id: traverse(stmt) });
                });
                break;

            case "Assignment":
                label = ":=";
                color = "#b8daff";
                children.push({ edgeLabel: "id", id: drawLeaf(node.id) });
                children.push({ edgeLabel: "expr", id: traverse(node.expr) });
                break;

            case "BinaryExpr":
                label = escapeHtml(node.op);
                color = "#b8daff";
                children.push({ edgeLabel: "left", id: traverse(node.left) });
                children.push({ edgeLabel: "right", id: traverse(node.right) });
                break;

            case "If":
                label = "if";
                color = "#d4edda";
                children.push({
                    edgeLabel: "cond",
                    id: traverse(node.condition),
                });
                children.push({ edgeLabel: "body", id: traverse(node.body) });
                break;

            case "While":
                label = "while";
                color = "#d4edda";
                children.push({
                    edgeLabel: "cond",
                    id: traverse(node.condition),
                });
                children.push({ edgeLabel: "body", id: traverse(node.body) });
                break;

            case "Call":
                label = "call";
                color = "#f5c6cb";
                children.push({ edgeLabel: "fn", id: drawLeaf(node.funcName) });
                node.args.forEach((arg: AST, i: number) => {
                    children.push({ edgeLabel: `arg${i}`, id: traverse(arg) });
                });
                break;

            case "ExprStmt":
                label = "expr";
                children.push({ edgeLabel: "", id: traverse(node.expr) });
                break;

            default:
                // Fallback for unexpected nodes
                label = (node as any).kind;
        }

        // Draw the Node Definition
        lines.push(
            `  ${name} [label=<<b>${label}</b>>, fillcolor="${color}"];`,
        );

        // Draw the Edges
        children.forEach((child) => {
            const edgeAttr: string = child.edgeLabel
                ? ` [label="${child.edgeLabel}"]`
                : "";
            lines.push(`  ${name} -> node${child.id}${edgeAttr};`);
        });

        return myId;
    }

    // Start Traversal
    traverse(tree);
    lines.push("}");
    return lines.join("\n");
}