In [1]:
import json
from pathlib import Path

# —— 1. 加载并合并原始 JSON 列表 ——
with open('kaiyu.json', 'r', encoding='utf-8') as f:
    kaiyu = json.load(f)
with open('shuo.json', 'r', encoding='utf-8') as f:
    shuo = json.load(f)
with open('daoyang.json', 'r', encoding='utf-8') as f:
    daoyang = json.load(f)
with open('ziming.json', 'r', encoding='utf-8') as f:
    ziming = json.load(f)

# 对 ziming 中的 human_judge_result +1
for rec in ziming:
    rec['human_judge_result'] += 1

# 合并所有记录，保持对原列表中 dict 的引用
human_evaluation_data = kaiyu + shuo + daoyang + ziming


In [2]:
print(len(human_evaluation_data))

168


In [3]:
# —— 2. 定义 outputs 基路径，往上一级目录 ———
base_outputs = Path.cwd().parent / "outputs"


# —— 3. 只保留 human_judge_result==1 且 paper_index 在指定列表中的记录 ——
target_papers = [1, 2, 3, 7, 8, 9, 11, 30, 31]
records_to_update = [
    rec for rec in human_evaluation_data
    if rec.get('human_judge_result') == 1 and rec.get('paper_index') in target_papers
]
print(f"共有 {len(records_to_update)} 条记录 human_judge_result=1 且 paper_index 在 {target_papers} 中，需要处理。\n")


共有 28 条记录 human_judge_result=1 且 paper_index 在 [1, 2, 3, 7, 8, 9, 11, 30, 31] 中，需要处理。



In [None]:
for idx, record in enumerate(records_to_update, 1):
    annotator     = record['annotator']
    agent_type    = record['agent_type']
    model_name    = record['model_name']
    paper_index   = record['paper_index']
    function_index= record['function_index']
    original_comment = record.get('comment', '')
    
    # 选择 BaseAgent 或 OpenHands 目录
    agent_dir = "BaseAgent" if agent_type == "no_agent" else "OpenHands"
    model_dir = base_outputs / agent_dir / model_name
    
    # 查找以 "{paper_index}-" 开头的论文文件夹
    paper_folders = [p for p in model_dir.iterdir() 
                     if p.is_dir() and p.name.startswith(f"{paper_index}-")]
    if not paper_folders:
        print(f"❌ 未找到论文 {paper_index} 文件夹: {model_dir}")
        continue
    paper_folder = sorted(paper_folders)[0]
    
    # 读取 info.json
    info = json.loads((paper_folder / "info.json").read_text(encoding='utf-8'))
    repo_folder_name = info['repo_folder_name']
    impls = info['implementations']
    
    # 获取指定实现项
    impl = impls[function_index]
    instruction    = impl.get('instruction', '')
    goal_file_rel  = impl.get('goal_file', '')
    goal_function  = impl.get('goal_function', '')
    class_name     = impl.get('class_name', '')
    golden_file_rel= impl.get('golden_file', '')
    
    # —— 打印元信息和文件内容 ——
    print(f"\n[{idx}/{len(records_to_update)}] 注释者={annotator}, 代理={agent_type}, 模型={model_name}, "
          f"论文={paper_index}, 函数索引={function_index}")
    print(f"Original comment: {original_comment!r}\n")

    print(f"Instruction:\n{instruction}\n")
    print(f"Goal Function: {goal_function}")
    print(f"Class Name: {class_name}\n")
    
    # goal_file
    goal_path = paper_folder / repo_folder_name / goal_file_rel
    if goal_path.exists():
        print(f"--- {goal_file_rel} 内容 ---")
        print(goal_path.read_text(encoding='utf-8'))
    else:
        print(f"❌ 未找到 goal_file: {goal_path}")
    
    # golden_file
    golden_path = paper_folder / golden_file_rel
    if golden_path.exists():
        print(f"\n--- {golden_file_rel} 内容 ---")
        print(golden_path.read_text(encoding='utf-8'))
    else:
        print(f"❌ 未找到 golden_file: {golden_path}")
    
    # —— 交互式输入 comment —— 
    new_comment = input("\n请输入新的 comment（留空跳过）：").strip()
    if new_comment:
        record['comment'] = new_comment
        print("✅ 已更新 comment。\n")
    else:
        print("⏭️ 跳过，不修改 comment。\n")
    
    print("="*80)




[1/28] 注释者=kaiyu, 代理=no_agent, 模型=gpt4.1, 论文=2, 函数索引=0
Original comment: "LLM using the other class function not used in gold answer, and the loss function didn't implement the forward, therefore the loss will return None"

Instruction:
Implement the forward function of calulating the Hyperbolic Loss in src/hierarchy_transformers/losses/hit_loss.py based on paper.pdf.

Goal Function: forward
Class Name: HierarchyTransformerLoss

--- src/hierarchy_transformers/losses/hit_loss.py 内容 ---
from __future__ import annotations
import logging
from collections.abc import Iterable
import torch
import torch.nn.functional as F
from geoopt.manifolds import PoincareBall
from hierarchy_transformers.models import HierarchyTransformer
from hierarchy_transformers.utils import format_citation
logger = logging.getLogger(__name__)

class HierarchyTransformerLoss(torch.nn.Module):
    """Hyperbolic loss that linearly combines hperbolic clustering loss and hyperbolic Centripetal loss and applies weights for 

In [None]:
file_map = {
    'kaiyu': kaiyu,
    'shuo': shuo,
    'daoyang': daoyang,
    'ziming': ziming,
}
for filename, data_list in file_map.items():
    with open(f'{filename}.json', 'w', encoding='utf-8') as f:
        json.dump(data_list, f, ensure_ascii=False, indent=2)
    print(f"💾 保存更新到 {filename}.json")

print("\n全部处理完毕。")