In [13]:
import json
import re
from tqdm import tqdm
from afl.util.utils import load_json
from afl.util.preprocess_data import extract_structure

def load_jsonl(file_path):
    with open(file_path, 'r') as file:
        data = [json.loads(line) for line in file]
    return data

def extract_file(diff_text):
    file_pattern = r"^diff --git a\/(.+?) b\/\1"
    files = re.findall(file_pattern, diff_text, re.MULTILINE)
    return files

def extract_line(diff_text):
    lines_pattern = r"@@ -(\d+),(\d+) \+(\d+),(\d+)"
    line_changes = re.findall(lines_pattern, diff_text)
    lines = [int(int(l[0]) + int(l[1])/2) for l in line_changes]
    return lines

def parse_gt_methods(gt_entries):
    """
    解析ground truth中的条目，并统一处理为文件级别和函数级别的定位。
    """
    files, methods = set(), set()

    for entry in gt_entries:
        parts = entry.split('::')

        if len(parts) == 2:  # File::Method 或 File::Class
            file_name, method_or_class = parts
            files.add(file_name)
            methods.add(method_or_class)

        elif len(parts) == 1:  # File
            files.add(parts[0])

    return files, methods


def get_function_from_line(file_name: str, line: int, instance_id: str):
    line = int(line)
    d = REPO_STRUCTURE[instance_id]
    structure = d["structure"]
    files, classes, functions = extract_structure(structure)

    for item in functions:
        if item['file'] == file_name and int(item['start_line']) <= line <= int(item['end_line']):
            return item['name']

    for item in classes:
        if item['file'] == file_name and int(item['start_line']) <= line <= int(item['end_line']):
            for method in item['methods']:
                if int(method['start_line']) <= line <= int(method['end_line']):
                    return f"{item['name']}.{method['name']}"
            return item['name']

    return None

def eval_acc(patches, gt):
    acc = 0

    for patch in tqdm(patches, desc="Evaluating patches"):
        if patch['model_patch']=="":
            continue
        instance_id = patch['instance_id']
        try:
            file_name = extract_file(patch['model_patch'])[0]
            line_list = extract_line(patch['model_patch'])
        except:
            continue
        function_list = []
        for line in line_list:
            function = get_function_from_line(file_name, line, instance_id)
            function_list.append(function)
        gt_files, gt_methods = parse_gt_methods(gt[instance_id])
        if set(function_list) & set(gt_methods) != set():
            acc += 1

    return acc / 300

In [7]:
gt_data = load_json('gt.json')
REPO_STRUCTURE = {instance_id: load_json(f"../repo_structures/{instance_id}.json") for instance_id in
                  tqdm(gt_data.keys())}

100%|██████████| 300/300 [02:33<00:00,  1.95it/s]


In [14]:
path_list = [
    "../test_patches/agentless_1.5.jsonl",
    "../test_patches/autocoderover.jsonl",
    "../test_patches/sweagent.jsonl",
    "../all_preds_afl_gpt.jsonl",
    "../all_preds_agentless_gpt.jsonl",
    "../all_preds_orcaloca_gpt.jsonl",
]
for p in path_list:
    patches = load_jsonl(p)
    acc = eval_acc(patches, gt_data)
    print(p, acc)

Evaluating patches: 100%|██████████| 300/300 [00:27<00:00, 10.82it/s] 


../test_patches/agentless_1.5.jsonl 0.49333333333333335


Evaluating patches: 100%|██████████| 296/296 [00:16<00:00, 17.84it/s] 


../test_patches/autocoderover.jsonl 0.4633333333333333


Evaluating patches: 100%|██████████| 278/278 [00:53<00:00,  5.15it/s]


../test_patches/sweagent.jsonl 0.3233333333333333


Evaluating patches: 100%|██████████| 300/300 [00:15<00:00, 19.96it/s] 


../all_preds_afl_gpt.jsonl 0.4666666666666667


Evaluating patches: 100%|██████████| 295/295 [00:24<00:00, 11.84it/s] 


../all_preds_agentless_gpt.jsonl 0.43


Evaluating patches: 100%|██████████| 262/262 [00:14<00:00, 18.33it/s] 

../all_preds_orcaloca_gpt.jsonl 0.38333333333333336





In [20]:
path_list = [
    # "../4o.jsonl",
    "../claude.jsonl",
    # "../test_patches/agentless_claude.jsonl",
    "../test_patches/orcaloca_claude.jsonl",
]
for p in path_list:
    patches = load_jsonl(p)
    acc = eval_acc(patches, gt_data)
    print(p, acc)

Evaluating patches: 100%|██████████| 300/300 [00:15<00:00, 19.25it/s] 


../claude.jsonl 0.5366666666666666


Evaluating patches: 100%|██████████| 300/300 [00:25<00:00, 11.73it/s]

../test_patches/orcaloca_claude.jsonl 0.57



