In [None]:
import json
import os
from typing import Dict, List


In [None]:
path = "/data/userdata/v-yuanteli/aide_gpt_4.5_our_results_55/2025-05-05T12-20-07-GMT_run-group_aide/text-normalization-challenge-english-language_63956b26-4513-4a62-8af1-e360795aac70/logs/journal.json"

In [None]:
import json
from collections import defaultdict
from itertools import combinations
import os


def build_pairs_from_journal(path: str):
    """
    从单个 journal.json 构造 preference pairs
    """
    # -------- 1. extract competition name --------
    base = os.path.basename(os.path.dirname(os.path.dirname(path)))
    # e.g. text-normalization-challenge-english-language_xxxxx
    comp_name = base.split("_")[0]

    # -------- 2. load data --------
    with open(path, "r", encoding="utf-8") as f:
        data = json.load(f)

    nodes = data["nodes"]
    node2parent = data.get("node2parent", {})
    node_dict = {n["id"]: n for n in nodes}

    # -------- 3. helpers --------
    def get_score(node):
        if node is None:
            return 0.0
        val = node.get("metric", {}).get("value")
        return 0.0 if val is None else float(val)

    def get_reward(node, parent_node):
        metric = node.get("metric", {})
        maximize = metric.get("maximize")

        cur_score = get_score(node)
        parent_score = get_score(parent_node)

        if maximize is True:
            return cur_score - parent_score
        elif maximize is False:
            return parent_score - cur_score
        else:
            # maximize is None
            return 0.0

    def build_plan_chain_from_root(node_id: str) -> str:
        chain = []
        cur = node_id
        while cur is not None:
            plan = node_dict[cur].get("plan")
            if plan is not None:
                chain.append(plan)
            cur = node2parent.get(cur)
        return " -> ".join(reversed(chain))

    # -------- 4. build node-level results --------
    results = []

    for node in nodes:
        node_id = node["id"]
        parent_id = node2parent.get(node_id)
        parent_node = node_dict[parent_id] if parent_id is not None else None

        cur_score = get_score(node)
        parent_score = get_score(parent_node)

        results.append({
            "node_id": node_id,
            "plan_chain": build_plan_chain_from_root(node_id),
            "cur_score": cur_score,
            "parent_score": parent_score,
            "score_diff": cur_score - parent_score,
            "reward": get_reward(node, parent_node),
            "maximize": node.get("metric", {}).get("maximize"),
        })

    # -------- 5. group by parent & build pairs --------
    parent2children = defaultdict(list)
    for r in results:
        parent_id = node2parent.get(r["node_id"])
        if parent_id is not None:
            parent2children[parent_id].append(r)

    pairs = []

    for parent_id, children in parent2children.items():
        if len(children) < 2:
            continue

        for a, b in combinations(children, 2):
            if a["reward"] == b["reward"]:
                continue

            if a["reward"] > b["reward"]:
                better, worse = a, b
            else:
                better, worse = b, a

            pairs.append({
                "agent":"aide",
                "comptation_name": comp_name,
                "parent_id": parent_id,
                #"better_node_id": better["node_id"],
                #"worse_node_id": worse["node_id"],
                "winner": better["plan_chain"],
                "loser": worse["plan_chain"],
                "score_diff": better["reward"] - worse["reward"],
            })

    return pairs

import os


import os
import random
from collections import defaultdict


def build_pairs_from_run_group(
    run_group_dir: str,
    max_pairs_per_comp: int = 500,
    seed: int = 42,
):
    """
    遍历 run-group 目录下所有比赛，汇总 preference pairs
    对每个 competition 施加 pair 数量上限
    """
    random.seed(seed)

    comp2pairs = defaultdict(list)
    missing = []

    for name in os.listdir(run_group_dir):
        comp_dir = os.path.join(run_group_dir, name)
        if not os.path.isdir(comp_dir):
            continue

        journal_path = os.path.join(comp_dir, "logs", "journal.json")
        if not os.path.isfile(journal_path):
            missing.append(comp_dir)
            continue

        try:
            pairs = build_pairs_from_journal(journal_path)

            # competition 名从目录中抽
            comp_name = name.split("_")[0]
            comp2pairs[comp_name].extend(pairs)

            print(f"[OK] {name}: {len(pairs)} pairs")
        except Exception as e:
            print(f"[ERROR] {name}: {e}")

    # -------- apply per-competition cap --------
    all_pairs = []

    for comp, plist in comp2pairs.items():
        original = len(plist)

        if original > max_pairs_per_comp:
            plist = random.sample(plist, max_pairs_per_comp)
            print(f"[CAP] {comp}: {original} -> {max_pairs_per_comp}")

        all_pairs.extend(plist)

    print("=" * 80)
    print(f"Total competitions processed: {len(comp2pairs)}")
    print(f"Total preference pairs: {len(all_pairs)}")
    print(f"Missing journal.json: {len(missing)}")

    return all_pairs


In [None]:
run_group_dir1 = "/data/userdata/v-yuanteli/aide_gpt_4.5_our_results_55/2025-05-05T12-20-07-GMT_run-group_aide"
all_pairs1 = build_pairs_from_run_group(run_group_dir1)

run_group_dir2 = "/data/userdata/v-yuanteli/aide_gpt_o4mini_our_results_54/2025-05-04T09-53-02-GMT_run-group_aide"
all_pairs2 = build_pairs_from_run_group(run_group_dir2)

run_group_dir3 = "/data/userdata/v-yuanteli/aide_gpt_o3_our_results_53/2025-05-03T09-10-56-GMT_run-group_aide"
all_pairs3 = build_pairs_from_run_group(run_group_dir3)



In [None]:
all_pairs = all_pairs1 + all_pairs2 + all_pairs3

In [None]:
len(all_pairs)

In [None]:
out_path = "/data/userdata/v-lijingyuan/dpo/aide_preference_pairs.json"

with open(out_path, "w", encoding="utf-8") as f:
    json.dump(all_pairs, f, ensure_ascii=False, indent=2)

print(f"[SAVED] {len(all_pairs)} pairs -> {out_path}")

In [None]:
path = "/data/userdata/v-yuanteli/aide_gpt_4.5_our_results_55/2025-05-05T12-20-07-GMT_run-group_aide/text-normalization-challenge-english-language_63956b26-4513-4a62-8af1-e360795aac70/logs/journal.json"

In [None]:
competitions = sorted({p["competition"] for p in all_pairs})
print(competitions)
print(f"Total competitions: {len(competitions)}")

In [None]:
import json

with open("/data/userdata/v-lijingyuan/train_reward_stage3_1/final_data_diff_2.json", "r") as f:
    data = json.load(f)

data[0]

In [None]:
import matplotlib.pyplot as plt

# Data
steps = [1044, 2089, 3134, 4179]

acc_from_zero = [
    0.631351888,
    0.7138063311576843,
    0.7631831169128418,
    0.8060882091522217,
]

acc_with_4b = [
    0.7128475308418274,
    0.8326941728591919,
    0.8710450530052185,
    0.8921380639076233,
]

# Figure
plt.figure(figsize=(6.5, 4.2))

plt.plot(
    steps,
    acc_from_zero,
    marker='o',
    linestyle='--',
    linewidth=2,
    markersize=6,
    label='AIDE few-shot (with Qwen3 4B)',
)

plt.plot(
    steps,
    acc_with_4b,
    marker='s',
    linestyle='--',
    linewidth=2,
    markersize=6,
    label='AIDE few-shot (with MLW-o1)',
    color='red',
)

# Axes & labels
plt.xlabel("Training Step", fontsize=12)
plt.ylabel("Train Accuracy", fontsize=12)
plt.xticks(fontsize=11)
plt.yticks(fontsize=11)

plt.annotate(
    f"{acc_from_zero[-1]:.3f}",
    (steps[-1], acc_from_zero[-1]),
    textcoords="offset points",
    xytext=(-30, -20),   # 向左
    fontsize=11,
    color="#1f77b4",
)

plt.annotate(
    f"{acc_with_4b[-1]:.3f}",
    (steps[-1], acc_with_4b[-1]),
    textcoords="offset points",
    xytext=(-30, -20),
    fontsize=11,
    color="red",
)
# Grid (light, paper-friendly)
plt.grid(True, linestyle=':', linewidth=0.8, alpha=0.7)

# Legend
plt.legend(fontsize=11, frameon=False)
plt.savefig(
    "train_accuracy_curve.png",
    dpi=300,
    bbox_inches="tight",
)
plt.tight_layout()
plt.show()


In [None]:
import matplotlib.pyplot as plt

# Data
steps = [1000, 2045, 3090, 4135]

val_acc_from_zero = [
    0.6642073392868042,
    0.7099669575691223,
    0.7472032904624939,
    0.7635574340820312,
]

val_acc_with_4b = [
    0.7578574419021606,
    0.7881685495376587,
    0.8078787326812744,
    0.8116610050201416,
]

# Figure
plt.figure(figsize=(6.5, 4.2))

plt.plot(
    steps,
    val_acc_from_zero,
    marker='o',
    linestyle='--',
    linewidth=2,
    markersize=6,
    label='AIDE few-shot (with Qwen3 4B)',
)

plt.plot(
    steps,
    val_acc_with_4b,
    marker='s',
    linestyle='--',
    linewidth=2,
    markersize=6,
    label='AIDE few-shot (with MLW-o1)',
    color='red',
)

# Axes & labels
plt.xlabel("Validation Step", fontsize=12)
plt.ylabel("Validation Accuracy", fontsize=12)
plt.xticks(fontsize=11)
plt.yticks(fontsize=11)

plt.annotate(
    f"{val_acc_from_zero[-1]:.3f}",
    (steps[-1], val_acc_from_zero[-1]),
    textcoords="offset points",
    xytext=(-30, -20),   # 向左
    fontsize=11,
    color="#1f77b4",
)

plt.annotate(
    f"{val_acc_with_4b[-1]:.3f}",
    (steps[-1], val_acc_with_4b[-1]),
    textcoords="offset points",
    xytext=(-30, -20),
    fontsize=11,
    color="red",
)
# Grid (light, paper-friendly)
plt.grid(True, linestyle=':', linewidth=0.8, alpha=0.7)

# Legend
plt.legend(fontsize=11, frameon=False)
plt.savefig(
    "val_accuracy_curve.png",
    dpi=300,
    bbox_inches="tight",
)
plt.tight_layout()
plt.show()
