### parsing functions
using - https://github.com/tree-sitter-grammars/tree-sitter-glsl
basic usage: https://github.com/tree-sitter/py-tree-sitter

In [109]:
import tree_sitter_glsl as tsglsl
import tree_sitter
from tree_sitter import Language, Parser

GLSL_LANGUAGE = Language(tsglsl.language())

parser = Parser(GLSL_LANGUAGE)

In [110]:
example_code = """
// comment directly infront of a function
vec3 red(float intensity){
    // comment inside the function body, but the top
    intensity = max(intensity, 1.0);

    // comment deeper inside the function body
    return vec3(1.0,0.0,0.0) * intensity;
}

vec3 green(float intensity){
    // docstring inside
    // second inside docstring
    intensity = max(intensity, 1.0);

    return vec3(0.0,1.0,0.0) * intensity;
}

// comment one before
// comment two before
vec3 blue(float intensity){
    intensity = max(intensity, 1.0);

    return vec3(0.0,0.0,1.0) * intensity;
}

void mainImage( out vec4 fragColor, in vec2 fragCoord )
{
    vec2 uv = fragCoord/iResolution.xy;

    vec3 col = mix(red(0.5), blue(0.3), 0.8);
    col = mix(col, green(1.0), sin(iTime));
    fragColor = vec4(col,1.0);
}
"""

tree = parser.parse(bytes(example_code, encoding="utf-8"))
tree

<tree_sitter.Tree at 0x1b6d857c430>

In [111]:
root_node = tree.root_node
for child in root_node.children:
    print(child.type)
    if child.type == "function_definition":
        print(child.children[-1])
    #     print(f"{child.text.decode(encoding='utf-8')}\n##byte range is {child.start_byte}:{child.end_byte}")
    #     print(str(child))

comment
function_definition
(compound_statement (comment) (expression_statement (assignment_expression left: (identifier) right: (call_expression function: (identifier) arguments: (argument_list (identifier) (number_literal))))) (comment) (return_statement (binary_expression left: (call_expression function: (identifier) arguments: (argument_list (number_literal) (number_literal) (number_literal))) right: (identifier))))
function_definition
(compound_statement (comment) (comment) (expression_statement (assignment_expression left: (identifier) right: (call_expression function: (identifier) arguments: (argument_list (identifier) (number_literal))))) (return_statement (binary_expression left: (call_expression function: (identifier) arguments: (argument_list (number_literal) (number_literal) (number_literal))) right: (identifier))))
comment
comment
function_definition
(compound_statement (expression_statement (assignment_expression left: (identifier) right: (call_expression function: (ident

In [112]:
# https://tree-sitter.github.io/tree-sitter/using-parsers#query-syntax

query = GLSL_LANGUAGE.query(
        """
        (
        (comment)* @comments.before
        (function_definition 
            type: (primitive_type) @function.type
            declarator: (function_declarator) @function.declarator
            
            body: (compound_statement
                (comment)* @docstring
                ) @function.body
        )
        )
        """
    )

matches = query.matches(tree.root_node)
# print(matches)
for match in matches:
    # break
    print(match[1])
    # print()

{'comments.before': [<Node type=comment, start_point=(1, 0), end_point=(1, 41)>], 'function.type': <Node type=primitive_type, start_point=(26, 0), end_point=(26, 4)>, 'function.declarator': <Node type=function_declarator, start_point=(26, 5), end_point=(26, 55)>, 'function.body': <Node type=compound_statement, start_point=(27, 0), end_point=(33, 1)>}
{'comments.before': [<Node type=comment, start_point=(18, 0), end_point=(18, 21)>, <Node type=comment, start_point=(19, 0), end_point=(19, 21)>], 'function.type': <Node type=primitive_type, start_point=(26, 0), end_point=(26, 4)>, 'function.declarator': <Node type=function_declarator, start_point=(26, 5), end_point=(26, 55)>, 'function.body': <Node type=compound_statement, start_point=(27, 0), end_point=(33, 1)>}


In [113]:
from pprint import pprint

pprint(str(root_node))

('(translation_unit (comment) (function_definition type: (type_identifier) '
 'declarator: (function_declarator declarator: (identifier) parameters: '
 '(parameter_list (parameter_declaration type: (primitive_type) declarator: '
 '(identifier)))) body: (compound_statement (comment) (expression_statement '
 '(assignment_expression left: (identifier) right: (call_expression function: '
 '(identifier) arguments: (argument_list (identifier) (number_literal))))) '
 '(comment) (return_statement (binary_expression left: (call_expression '
 'function: (identifier) arguments: (argument_list (number_literal) '
 '(number_literal) (number_literal))) right: (identifier))))) '
 '(function_definition type: (type_identifier) declarator: '
 '(function_declarator declarator: (identifier) parameters: (parameter_list '
 '(parameter_declaration type: (primitive_type) declarator: (identifier)))) '
 'body: (compound_statement (comment) (comment) (expression_statement '
 '(assignment_expression left: (identif

In [114]:
from typing import List, Tuple
def parse_functions(code:str) -> List[Tuple[int,int,int,int,int]]:
    """
    parses the code using tree-parser-glsl
    returns the **byte-indecies** for before_comment, start header, end header, end docstring, end_function.
    returns a list 5-tupel. If before_comment or docstring aren't found, the indiecies will coinside with the next one.
    """
    tree = parser.parse(bytes(code, encoding="utf-8"))
    root_node = tree.root_node
    funcs = []
    
    # lazy init
    start_comment = start_header = end_header = end_docstring = end_function = None
    for child in root_node.children:
        if child.type == "comment" and not start_comment:
            start_comment = child.start_byte
        if child.type == "function_definition":
            start_header = child.start_byte
            if not start_comment:
                start_comment = start_header
            end_function = child.end_byte
            end_header = child.children[-1].children[0].end_byte
            # inside the function body, past the "{"
            for sub_child in child.children[-1].children[1:]:
                if sub_child.type == "comment":
                    end_docstring = sub_child.end_byte
                else:
                    if not end_docstring:
                        end_docstring = end_header
                    break
                


            funcs.append(tuple([start_comment, start_header, end_header, end_docstring, end_function]))
            start_comment = start_header = end_header = end_docstring = end_function = None
    return funcs

def color_ranges(code:str, func_bytes:list):
    code_bytes = bytes(code, encoding="utf-8")
    colored_bytes = b"\x1b[0m"
    for start_comment, start_header, end_header, end_docstring, end_function in func_bytes:
        colored_bytes += b"\x1b[32m" + code_bytes[start_comment:start_header]
        colored_bytes += b"\x1b[31m" + code_bytes[start_header:end_header]
        colored_bytes += b"\x1b[33m" + code_bytes[end_header:end_docstring]
        colored_bytes += b"\x1b[34m" + code_bytes[end_docstring:end_function]
        colored_bytes += b"\x1b[0m" #escape back to normal
        colored_bytes += b"\n\n" # for pretty

    return colored_bytes.decode(encoding="utf-8")

funcs = parse_functions(example_code)
colored_funcs = color_ranges(example_code, func_bytes=funcs)
print(colored_funcs)


[0m[32m// comment directly infront of a function
[31mvec3 red(float intensity){[33m
    // comment inside the function body, but the top[34m
    intensity = max(intensity, 1.0);

    // comment deeper inside the function body
    return vec3(1.0,0.0,0.0) * intensity;
}[0m

[32m[31mvec3 green(float intensity){[33m
    // docstring inside
    // second inside docstring[34m
    intensity = max(intensity, 1.0);

    return vec3(0.0,1.0,0.0) * intensity;
}[0m

[32m// comment one before
// comment two before
[31mvec3 blue(float intensity){[33m[34m
    intensity = max(intensity, 1.0);

    return vec3(0.0,0.0,1.0) * intensity;
}[0m

[32m[31mvoid mainImage( out vec4 fragColor, in vec2 fragCoord )
{[33m[34m
    vec2 uv = fragCoord/iResolution.xy;

    vec3 col = mix(red(0.5), blue(0.3), 0.8);
    col = mix(col, green(1.0), sin(iTime));
    fragColor = vec4(col,1.0);
}[0m


