In [None]:

import json
from pathlib import Path

from itertools import combinations
from collections import defaultdict
from transformers import AutoTokenizer



In [None]:
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-4B-Thinking-2507")
tokenizer

In [None]:
amlt_root_json_path = Path("/home/bowen/workspace/fine-tune/root_id_map.json")
#for exp_json in amlt_json_path.iterdir()[0:3]:
with open(amlt_root_json_path, "r") as f:
    root_map = json.load(f)


In [None]:
root_map

In [None]:
def build_hypo_chain(parent_hyps, current_hyp, max_tokens=8000):
    """
    拼接 parent_hyps （按时间顺序），加入 current_hyp，截断到 max_tokens
    """
    sep = "->"
    chain = parent_hyps[::-1] + [current_hyp]  # parent从早到晚 + 当前

    while True:
        chain_text = sep.join(chain)
        tokenized = tokenizer(chain_text, add_special_tokens=False)
        tokens = len(tokenized["input_ids"])

        # ✅ 已满足长度
        if tokens <= max_tokens:
            return chain_text

        # ✅ 截断最前面的 parent hypothesis
        if len(chain) > 1:
            chain.pop(0)
            continue

        # ✅ 到这说明只剩 current_hyp 还超 → 硬截断 current_hyp 尾部
        truncated_ids = tokenized["input_ids"][-max_tokens:]
        return tokenizer.decode(truncated_ids, skip_special_tokens=True)

        
        
def get_parent_hypotheses(id_to_entry,entry):
    """递归获取父条目的 hypothesis"""
    hypotheses = []
    parent_id = entry['input'].get('parent_id')
    while parent_id:
        parent_entry = id_to_entry.get(parent_id)
        if not parent_entry:
            break
        if parent_entry['input']["feedback_decision"] == True:
            hypotheses.append(parent_entry['input']['hypothesis'])
        parent_id = parent_entry['input'].get('parent_id')
    return hypotheses


def get_parent_scores(id_to_entry,entry):
    """递归获取父条目的 hypothesis"""
    scores = []
    parent_id = entry['input'].get('parent_id')
    while parent_id:
        parent_entry = id_to_entry.get(parent_id)
        if not parent_entry:
            break
        if parent_entry['input']["feedback_decision"] == True:
            scores.append(parent_entry['input']['valid_score'])
        parent_id = parent_entry['input'].get('parent_id')
    return scores



In [None]:
final_data = []
final_pairs = []
amlt_json_path = Path("/home/bowen/workspace/fine-tune/amlt_jsons")
#for exp_json in amlt_json_path.iterdir()[0:3]:
for exp_json in list(amlt_json_path.iterdir()):
    with open(exp_json, "r") as f:
        data = json.load(f)
    
    all_pairs = []
    for ids, loop_data in data.items():
        comptation_name = ids.split(" ")[1]
        if ids.split(" ")[-1]== "scenario":
            bigger_is_better = int(loop_data['metric_direction'])

        if "final_hypothesis" in loop_data and "feedback" in loop_data and "code" in loop_data:
            first_metric = next(iter(loop_data["valid_score"].values()))
            alpaca_data = {
                    "input": {
                        "exp_name": exp_json.name.replace(".json", ""),
                        "comptation_name":comptation_name,
                        "bigger_is_better": bigger_is_better,
                        "loop_id": int(ids.split(" ")[-1]),
                        "hypothesis": loop_data["final_hypothesis"]["hypothesis"],
                        #"test_report": loop_data["test_report"]["score"],
                        "valid_score": first_metric.get("ensemble", None),
                        "feedback_decision": loop_data["feedback"]['decision'],
                        "parent_id": loop_data.get("parent_id", None) ,
                        "root_id" : int(root_map[ids].split(" ")[-1]) if ids in root_map else None
                    }
                }
            all_pairs.append(alpaca_data)

    all_pairs_new = []
    id_to_entry = {}
    for entry in all_pairs:
        key = f"{entry['input']['exp_name']} {entry['input']['comptation_name']} {entry['input']['loop_id']}"
        id_to_entry[key] = entry

    id_to_entry1 = {}
    for entry in all_pairs:
        key = (entry['input']['exp_name'], entry['input']['comptation_name'], int(entry['input']['loop_id']))
        id_to_entry1[key] = entry


    for target_entry in all_pairs:
        parent_hyps = get_parent_hypotheses(id_to_entry,target_entry)
        target_entry['input']['hypothesis_chain'] =build_hypo_chain(parent_hyps, target_entry['input'].get('hypothesis'))#"<think_step>".join(parent_hyps[::-1] + [target_entry['input'].get('hypothesis')] )
        parnet_scores = get_parent_scores(id_to_entry,target_entry)
        if len(parnet_scores)>0: 
            target_entry['input']['parent_score'] = parnet_scores[0]
        else:
            target_entry['input']['parent_score'] = 10000000

        all_pairs_new.append(target_entry)

    del all_pairs
    final_pairs.extend(all_pairs_new)


    preference_pairs = []

    # --- 第一步：按比赛名分组 ---


    # --- 第二步：按 (exp_name, comptation_name, root_id) 分组 ---
    groups = defaultdict(list)
    for item in all_pairs_new:
        inp = item["input"]
        key = (inp["exp_name"], inp["comptation_name"], inp["root_id"])
        groups[key].append(inp)


    # --- 第三步：在每个组内生成 C(n, 2) 偏好对 ---
    for (exp_name, comp_name, root_id), items in groups.items():
        if len(items) < 2:
            continue  # 轨道上少于两条样本就跳过

        bigger_is_better = items[0]["bigger_is_better"]

        for a, b in combinations(items, 2):
            score_a = a["valid_score"] - a["parent_score"]
            score_b = b["valid_score"] - b["parent_score"]

            if bigger_is_better == 1:
                winner, loser = (a, b) if score_a > score_b else (b, a)
            else:
                winner, loser = (a, b) if score_a < score_b else (b, a)


            #condition1 = (a["loop_id"]- b["loop_id"])<10
            condition2 = max(len(a["hypothesis_chain"].split("->")) , len(b["hypothesis_chain"].split("->"))) <6


            # tokenized1 = tokenizer( a["hypothesis_chain"], add_special_tokens=False)
            # tokens1 = len(tokenized1["input_ids"])

            # tokenized2 = tokenizer(b["hypothesis_chain"], add_special_tokens=False)
            # tokens2 = len(tokenized2["input_ids"])


            #condition1 = (tokens1<2000) and (tokens2<2000)


            if score_a > -10000 and condition2:#condition1 and condition2:
                preference_pairs.append({
                    "exp_name": exp_name,
                    "comptation_name": comp_name,
                    "root_id": root_id,
                    "loop_pair": (a["loop_id"], b["loop_id"]),
                    "winner": winner["hypothesis_chain"],
                    "loser": loser["hypothesis_chain"],
                    "score_a":score_a,
                    "score_b":score_b,
                })

    final_data.extend(preference_pairs)

In [None]:
final_pairs = [k for k in final_pairs if len(k["input"]["hypothesis_chain"].split("->"))<6 and len(tokenizer(k["input"]["hypothesis_chain"], add_special_tokens=False)["input_ids"]) < 2000]

In [None]:
len(final_pairs)

In [None]:
import matplotlib.pyplot as plt

S = []
for i in range(len(final_pairs)):
    chain_text = final_pairs[i]["input"]["hypothesis_chain"]
    tokenized = tokenizer(chain_text, add_special_tokens=False)
    tokens = len(tokenized["input_ids"])
    S.append(tokens)

In [None]:
plt.hist(S, bins=30)
plt.xlabel("Token length")
plt.ylabel("Count")
plt.title("Distribution of tokenized hypothesis_chain length")
plt.show()

In [None]:
#less than 6

In [None]:
len(final_data)

In [None]:
final_data = [k for k in final_data if len(tokenizer(k["winner"], add_special_tokens=False)["input_ids"]) < 2000 and len(tokenizer(k["loser"], add_special_tokens=False)["input_ids"]) < 2000]

In [None]:
final_data[0:10]

In [None]:
import json

with open("final_data_diff_2.json", "w", encoding="utf-8") as f:
    json.dump(final_data, f, ensure_ascii=False, indent=2)

with open("final_pairs_diff_2.json", "w", encoding="utf-8") as f:
    json.dump(final_pairs, f, ensure_ascii=False, indent=2)


    

In [None]:
scp -r -B ib.ep10.213428.xyz /data/userdata/v-lijingyuan/ckpt/rm_bt_s1028_gc ib.ep03.213428.xyz:/data/userdata/v-lijingyuan 


scp -r -B /data/userdata/v-lijingyuan/ckpt/rm_bt_s1028_gc \
      username@ib.ep03.213428.xyz:/data/userdata/v-lijingyuan/
