In [32]:
import os
import tree_sitter
from tree_sitter import Language, Tree
import json
import tree_sitter
import tree_sitter_java as tsjava

lang_func_node = {
    'python': ['function_definition'],
    'c': ['function_definition'],
    'cpp': ['function_definition'],
    'java': ['method_declaration', 'constructor_declaration'],
    'javascript': ['function_declaration']
}

def get_ast(code):
    """Parses a given Java code snippet and returns the AST using Tree-sitter."""
    JAVA_LANGUAGE = tree_sitter.Language(tsjava.language())
    parser = tree_sitter.Parser(JAVA_LANGUAGE)
    return parser.parse(code.encode('utf-8'))

def get_nx_ast(code: str, lang: str):
    ast = get_ast(code)
    return ast

def traverse_tree(tree: Tree):
    cursor = tree.walk()

    reached_root = False
    while not reached_root:
        yield cursor.node

        if cursor.goto_first_child():
            continue

        if cursor.goto_next_sibling():
            continue

        retracing = True
        while retracing:
            if not cursor.goto_parent():
                retracing = False
                reached_root = True

            if cursor.goto_next_sibling():
                retracing = False
      
def _get_methods(ast, content, file_path, language):
    methods = []
    for node in traverse_tree(ast):
            
        if node.type in lang_func_node[language]:
            method = {}

            s, e = node.start_point[0] + 1, node.end_point[0] + 1

            # Split the content into lines
            lines = content.split('\n')

            # Extract lines from 's' to 'e'
            selected_lines = lines[s-1:e]  # Adjust indices to 0-based if needed
            selected_lines = [line.strip() for line in selected_lines]
            # Join the selected lines back into a string
            selected_content = ' '.join(selected_lines)

            method['code'] = selected_content
            method['path'] = file_path
            method['start_line'] = s
            method['end_line'] = e
            method['nloc'] = len(lines)
            method['token_count'] = len(selected_content)

            methods.append(method)
    return methods
 
def get_all_methods(file_path):
    try:
        content = open(file_path, 'r').read()
    except UnicodeDecodeError:
        return None
    ast = get_nx_ast(content, "java")
    file_methods = _get_methods(ast, content, file_path, "java")                
    return file_methods

def find_method_with_lines(file_path, start_line, end_line):
    methods = get_all_methods(file_path)
    if methods is None:
        return None, None, None
    
    for method in methods:
        if method['start_line'] <= start_line and method['end_line'] >= end_line:
            before = open(file_path, 'r').readlines()[method['start_line']-1:start_line-1]
            after = open(file_path, 'r').readlines()[end_line-1:method['end_line']]
            method['before'] = "\n".join([line[:-1] for line in before])
            method['after'] = "\n".join([line[:-1] for line in after])
            return method, method['start_line'], method['end_line'] + 1
    return None, None, None

def find_method_with_start_line(file_path, start_line):
    methods = get_all_methods(file_path)
    if methods is None:
        return None, None, None
    
    for method in methods:
        if method['start_line'] == start_line:
            with open(file_path, 'r') as f:
                lines = f.readlines()
            
            method_code = ''.join(lines[start_line - 1: method['end_line']])  # Slicing from start to end
            return method_code, method['start_line'], method['end_line'] + 1
    return None, None, None

In [33]:
def main_1():
    regminer4apr = "/external_disk/coding_space/ChatRepairRegression/experiments/regminer4apr.json"
    environments = "/external_disk/coding_space/ChatRepairRegression/experiments/environments"

    with open(regminer4apr, 'r') as f:
        dataset = json.load(f)

    for data in dataset:
        id, relative_path = data.split("_")
        file_path = os.path.join(environments, f'RegressionBug-{id}', "BUGGY", relative_path)
        bug_info = dataset[data]
        method, method_start_line, method_end_line = find_method_with_lines(file_path, bug_info['start_line'], bug_info['end_line'])
        
        bug_info['start_line'] += 1
        bug_info['end_line'] += 1

        bug_info['method_start_line'] = method_start_line
        bug_info['method_end_line'] = method_end_line
    
    # Write updated data back to the JSON file
    with open(regminer4apr, 'w') as f:
        json.dump(dataset, f, indent=4)

# main_1()

In [None]:
from collections import OrderedDict

def main_2():
    regminer4apr = "/external_disk/coding_space/ChatRepairRegression/experiments/regminer4apr_function_level.json"
    environments = "/external_disk/coding_space/ChatRepairRegression/experiments/environments/regminer4apr"

    with open(regminer4apr, 'r') as f:
        dataset = json.load(f)

    for data in dataset:
        if dataset[data].get("before") is not None:
            continue
        else:
            id, relative_path = data.split("_")
            file_path = os.path.join(environments, f'RegressionBug-{id}', "BUGGY", relative_path)
            bug_info = dataset[data]

            method, method_start_line, method_end_line = find_method_with_start_line(file_path, bug_info['method_start_line'])
            
            # Create OrderedDict with your desired key order
            ordered_bug_info = OrderedDict()
            ordered_bug_info["context"] = method
            ordered_bug_info["start_line"] = 0
            ordered_bug_info["end_line"] = 0
            ordered_bug_info["bug_id"] = str(id)
            ordered_bug_info["before"] = ""
            ordered_bug_info["after"] = ""
            ordered_bug_info["buggy_line"] = ""
            ordered_bug_info["buggy_lines"] = ""
            ordered_bug_info["method_start_line"] = method_start_line
            ordered_bug_info["method_end_line"] = method_end_line

            # Update the dataset
            dataset[data] = ordered_bug_info
            break

    # Write updated data back to the JSON file with ordered keys
    with open(regminer4apr, 'w') as f:
        json.dump(dataset, f, indent=4, ensure_ascii=False)

# main_2()


In [None]:
def extract_method_info(code):
    parser = Parser()
    parser.set_language(JAVA_LANGUAGE)
    
    tree = parser.parse(code.encode())
    root_node = tree.root_node

    method_name = None
    parameters = []

    # Traverse tree to find method_declaration nodes
    def traverse(node):
        nonlocal method_name, parameters
        if node.type == "method_declaration":
            for child in node.children:
                if child.type == "identifier":
                    method_name = code[child.start_byte:child.end_byte]
                elif child.type == "formal_parameters":
                    for param in child.children:
                        if param.type == "formal_parameter":
                            param_text = code[param.start_byte:param.end_byte]
                            parameters.append(param_text)
        for child in node.children:
            traverse(child)

    traverse(root_node)
    
    return method_name, parameters