In [41]:
import json
import yaml
from functools import partial

def pretty_print(data: dict, as_yaml: bool = False) -> None:
    if as_yaml:
        print(yaml.dump(data))
    print(json.dumps(data, indent=4))


# https://github.com/jamiebuilds/the-super-tiny-compiler/blob/master/the-super-tiny-compiler.js

In [3]:
def tokenizer(input_: str) -> list[dict]:
    current = 0
    tokens = []

    while current < len(input_):

        char = input_[current]
        if char == "(":
            tokens.append({"type": "paren", "value": "("})
            current += 1
            continue

        if char == ")":
            tokens.append({"type": "paren", "value": ")"})
            current += 1
            continue

        if char.isspace():
            # do not store the whitespace token, move to the next position
            current += 1
            continue

        if char.isnumeric():
            value = ""
            while char.isnumeric():
                value += char
                current += 1
                char = input_[current]

            tokens.append({"type": "number", "value": value})
            continue

        if char == "'":
            value = ""
            # do not store the opening double quotes
            current += 1
            char = input_[current]

            while char != "'":
                value += char
                current += 1
                char = input_[current]

            current += 1
            char = input_[current]
            tokens.append({"type": "string", "value": value})
            continue

        if char.isalpha():
            value = ""
            while char.isalpha():
                value += char
                current += 1
                char = input_[current]

            tokens.append({"type": "name", "value": value})
            continue

        raise ValueError(f"Unsupported char=`{char}`")

    return tokens

In [4]:
def test_tokenizer() -> None:
    assert (
        tokenizer("(add 3 (multiply 4 'hello'))")
        ==
        [
             {"type": "paren", "value": "("},
             {"type": "name", "value": "add"},
             {"type": "number", "value": "3"},
             {"type": "paren", "value": "("},
             {"type": "name", "value": "multiply"},
             {"type": "number", "value": "4"},
             {"type": "string", "value": "hello"},
             {"type": "paren", "value": ")"},
             {"type": "paren", "value": ")"}
        ]
    )


test_tokenizer()

In [5]:
def parser(tokens: list[dict]) -> dict:

    current = 0

    def walk(current: int) -> tuple[dict, int]:
        token = tokens[current]

        if token["type"] == "number":
            current += 1
            return {"type": "NumberLiteral", "value": token["value"]}, current

        if token["type"] == "string":
            current += 1
            return {"type": "StringLiteral", "value": token["value"]}, current

        if token["type"] == "paren" and token["value"] == "(":
            # skip the parenthesis
            current += 1
            token = tokens[current]

            node = {"type": "CallExpression", "name": token["value"], "params": []}

            # skip the name token
            current += 1
            token = tokens[current]

            while (
                token["type"] != "paren"
                or (token["type"] == "paren" and token["value"] != ")")
            ):
                node_to_append, current = walk(current)
                node["params"].append(node_to_append)
                token = tokens[current]

            # skip the closing parenthesis
            current += 1
            return node, current

        raise ValueError(f"Unknown token.type=`{token['type']}`")

    ast = {"type": "Program", "body": []}
    while current < len(tokens):
        node, current = walk(current)
        ast["body"].append(node)

    return ast

In [6]:
def test_parser() -> None:
    tokens = tokenizer("(add 3 (multiply 4 'hello'))")
    ast = parser(tokens)

    expected = {
        "type": "Program",
        "body": [
            {
                "type": "CallExpression",
                "name": "add",
                "params": [
                    {
                        "type": "NumberLiteral",
                        "value": "3"
                    },
                    {
                        "type": "CallExpression",
                        "name": "multiply",
                        "params": [
                            {
                                "type": "NumberLiteral",
                                "value": "4"
                            },
                            {
                                "type": "StringLiteral",
                                "value": "hello"
                            }
                        ]
                    }
                ]
            }
        ]
    }

    assert parser(tokens) == expected

In [16]:
def traverser(ast: dict, visitor: dict) -> None:

    def traverse_node_list(node_list: list[dict], parent: dict) -> None:
        for node in node_list:
            traverse_node(node, parent)

    def traverse_node(node: dict, parent: dict) -> None:
        funcs = visitor.get(node["type"])

        if funcs and "enter" in funcs:
            funcs["enter"](node, parent)

        if node["type"] == "Program":
            traverse_node_list(node["body"], node)
        elif node["type"] == "CallExpression":
            traverse_node_list(node["params"], node)
        elif node["type"] in ("NumberLiteral", "StringLiteral"):
            pass
        else:
            raise ValueError(f"Unkown node.type=`{node['type']}`")

        if funcs and "exit" in funcs:
            funcs["exit"](node, parent)

    traverse_node(node=ast, parent=None)

In [17]:
def transformer(ast: dict):
    new_ast = {
        "type": "Program",
        "body": []
    }

    ast["_context"] = new_ast["body"]

    def literal_enter(node: dict, parent: dict, type: str) -> None:
        parent["_context"].append({"type": type, "value": node["value"]})

    def call_expression_enter(node: dict, parent: dict) -> None:
        expression = {
            "type": "CallExpression",
            "callee": {
                "type": "Identifier",
                "name": node["name"],
            },
            "arguments": [],
        }

        node["_context"] = expression["arguments"]

        if parent["type"] != "CallExpression":
            expression = {
                "type": "ExpressionStatement",
                "expression": expression,
            }

        parent["_context"].append(expression)

    visitor = {
        "NumberLiteral": {
            "enter": partial(literal_enter, type="NumberLiteral"), 
        },
        "StringLiteral": {
            "enter": partial(literal_enter, type="StringLiteral"), 
        },
        "CallExpression": {
            "enter": call_expression_enter
        }
    }

    traverser(ast, visitor)
    return new_ast

In [28]:
def code_generator(node: dict):

    match node["type"]:
        case "Program":
            return "\n".join(map(code_generator, node["body"]))

        case "ExpressionStatement":
            return code_generator(node["expression"]) + ";"

        case "CallExpression":
            return (
                code_generator(node["callee"])
                + "("
                + ", ".join(map(code_generator, node["arguments"]))
                + ")"
            )

        case "Identifier":
            return node["name"]

        case "NumberLiteral":
            return node["value"]

        case "StringLiteral":
            return "'" + node["value"] + "'"

        case _:
            raise ValueError(f"Unknown node.type=`{node['type']}`")

In [29]:
tokens = tokenizer()
tokens = tokenizer("(add 2 (subtract 4 2))")
ast = parser(tokens)
new_ast = transformer(ast)

In [30]:
def compiler(src: str) -> str:
    tokens = tokenizer(src)
    ast = parser(tokens)
    new_ast = transformer(ast)
    return code_generator(new_ast)

In [40]:
src = [
    "(add 2 (subtract 4 2))",
    "(add 3 (multiply 4 'hello'))",
]

expected = [
    "add(2, subtract(4, 2));",
    "add(3, multiply(4, 'hello'));"
]

for s, e in zip(src, expected):
    assert compiler(s) == e