In [1]:
from tree_sitter import Language, Parser

Language.build_library(
    ".local/build/json_prefix.so",
    [
        "tree-sitter-json-prefix"
    ]
)
JSON_LANGUAGE = Language(".local/build/json_prefix.so", "json_prefix")

parser = Parser()
parser.set_language(JSON_LANGUAGE)

In [197]:
def depth_first_traversal(node):
    """Depth-first traversal of the tree"""
    yield node
    for child in node.children:
        yield from depth_first_traversal(child)

def is_json_or_prefix(s):
    tree = parser.parse(bytes(s, "utf8"))
    return not any(node.has_error for node in depth_first_traversal(tree.root_node))

s = '{ "a":"b"'

print(is_json_or_prefix(s))
tree = parser.parse(bytes(s, "utf8"))
print(tree.root_node.sexp())

True
(document (prefix_object (pair key: (string (string_content)) value: (string (string_content)))))


In [223]:
def prefix_parse(s):
    tree = parser.parse(bytes(s, "utf8"))
    if tree.root_node.has_error:
        return None
    root_value = tree.root_node.children[0]

    def traverse(node):
        is_prefix = node.type.startswith("prefix_")
        infered_type = node.type[len("prefix_"):] if is_prefix else node.type

        if infered_type == "null":
            return node, None

        if infered_type == "true":
            return node, True

        if infered_type == "false":
            return node, False

        if infered_type == "number":
            try:
                return node, int(node.text)
            except ValueError:
                return node, float(node.text)
            except Exception as e:
                print(node.text)
                raise e

        if infered_type == "string_content":
            return node, node.text.decode("utf-8")

        if infered_type == "string":
            for child in node.named_children:
                return traverse(child)
        
        if infered_type == "array":
            node_array = []
            values_array = []
            for child in node.named_children:
                if child.type == "trailing_comma":
                    continue
                node_value, value_value = traverse(child)
                node_array.append(node_value)
                values_array.append(value_value)
            return node_array, values_array

        if infered_type == "object":
            node_object = {}
            values_object = {}
            for pair in node.named_children:
                if pair.type == "trailing_comma":
                    continue
                if pair.type.startswith("prefix_"):
                    continue
                key = pair.named_children[0].named_children[0].text.decode("utf-8")
                node_value, value_value = traverse(pair.named_children[1])
                node_object[key] = node_value
                values_object[key] = value_value
            return node_object, values_object

    try:
        nodes, values = traverse(root_value)
        return tree, nodes, values
    except Exception as e:
        print(root_value.sexp())
        raise e

tree, nodes, values = prefix_parse("""
[1,2, { "a": "b"
""")

print(tree.root_node.sexp())
print(nodes)
print(values)

(document (prefix_array (number) (number) (prefix_object (pair key: (string (string_content)) value: (string (string_content))))))
[<Node type=number, start_point=(1, 1), end_point=(1, 2)>, <Node type=number, start_point=(1, 3), end_point=(1, 4)>, {'a': <Node type=string_content, start_point=(1, 14), end_point=(1, 15)>}]
[1, 2, {'a': 'b'}]


In [236]:
from jsonschema import Draft202012Validator

tree, nodes, instance = prefix_parse("""
{
    "home": {
        "name": "A",
        "score": 1
    },
    "away": {
        "name": "B",
        "score": 1
    }
""")

schema = {
    "type": "object",
    "properties": {
        "home": { "type": "object", "properties": { "name": { "type": "string" }, "score": { "type": "integer" } } },
        "away": { "type": "object", "properties": { "name": { "type": "string" }, "score": { "type": "integer" } } }
    },
    "required": ["home", "away"]
}

v = Draft202012Validator(schema)

for e in v.iter_errors(instance):
    path = e.path
    node = nodes
    value = instance

    # If path is empty, use root value as node
    if len(path) == 0:
        node = tree.root_node.named_children[0]

    while len(path) > 0:
        index = path.popleft()
        node = node[index]
        value = value[index]

    # Skip errors if node is incomplete
    if node.type.startswith("prefix_"):
        continue

    print(e.path)
    print(e.schema_path)
    print(node.text)