In [1]:
# ───────────────────────────────────────────────
# 0. Manual configuration
# ───────────────────────────────────────────────
%cd ..
%pwd
from pathlib import Path
import torch

DATA_ROOT = Path("data/chainscope/questions_json")
TEMPLATE_PATH = Path("data/chainscope/templates/instructions.json")
LOG_DIR = Path("logs")
OUT_DIR = Path("e_confirm_xy_yx/outputs")          # completions, verification, matches
MODEL_PATH = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"

# choose folder subsets
DATASETS = ["gt_NO_1", "gt_YES_1"]

BATCH_SIZE = 64
MAX_NEW_TOKENS = None

# ─── multi-run & sampling ────────────────────────────────────────
N_RUNS      = 10      # generate 10 reasoning chains per question
TEMPERATURE = 0.7     # sampling temperature
TOP_P       = 0.9     # nucleus-sampling top-p
# ──────────────────────────────────────────────────────────────────────

SAVE_HIDDEN, SAVE_ATTN = False, False
HIDDEN_LAYERS, ATTN_LAYERS = [0, -1], [0, -1]   # ignored unless above switches True
N_VERIFY = 0   # 0 == verify all

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ───────────────────────────────────────────────
# 1. Load model & tokenizer  (your helper)
# ───────────────────────────────────────────────
from a_confirm_posthoc.utils.model_handler import load_model_and_tokenizer

model, tokenizer, model_name, device = load_model_and_tokenizer(MODEL_PATH)
model.to(device)


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


/root/CoTFaithChecker


  from .autonotebook import tqdm as notebook_tqdm
2025-04-26 12:44:48,921 - INFO - CUDA is available. Using GPU.
2025-04-26 12:44:48,923 - INFO - Loading model and tokenizer: deepseek-ai/DeepSeek-R1-Distill-Llama-8B onto cuda
Loading checkpoint shards: 100%|██████████| 2/2 [00:03<00:00,  1.86s/it]
2025-04-26 12:44:59,395 - INFO - Model and tokenizer loaded successfully.


LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
      )
    )
    (norm): LlamaRMSNorm((4096,), eps=1e-05)
    (rotary_

In [2]:
from e_confirm_xy_yx.main.data_loader import get_dataset_files

# 0. Extra toggle
CLUSTERS = ["world"]   # no "no_wm"

# 2. Collect dataset files
dataset_files = get_dataset_files(
    DATA_ROOT,
    DATASETS,
    clusters=CLUSTERS,          # ← NEW ARG
)

# 5. Verify – point to aggregated cluster outputs
completion_files = sorted(
    (OUT_DIR / "completions" / "clusters").glob("*_completions.json")
)

# 6. Match YES vs NO on cluster files
verified_files = sorted((OUT_DIR / "verified").glob("*_verified.json"))

pairs = [
    (vf, vf.parent / vf.name.replace("_NO_", "_YES_"))
    for vf in verified_files
    if "_NO_" in vf.name
    and (vf.parent / vf.name.replace("_NO_", "_YES_")).exists()
]


data_loader — INFO — Logger initialised; log file = /root/CoTFaithChecker/logs/data_loader_20250426_124459.log
2025-04-26 12:44:59,426 - INFO - Logger initialised; log file = /root/CoTFaithChecker/logs/data_loader_20250426_124459.log
data_loader — INFO — → kept 9 after cluster filter ['world']
2025-04-26 12:44:59,430 - INFO - → kept 9 after cluster filter ['world']
data_loader — INFO — Found 9 files in data/chainscope/questions_json/gt_NO_1
2025-04-26 12:44:59,432 - INFO - Found 9 files in data/chainscope/questions_json/gt_NO_1
data_loader — INFO — → kept 9 after cluster filter ['world']
2025-04-26 12:44:59,435 - INFO - → kept 9 after cluster filter ['world']
data_loader — INFO — Found 9 files in data/chainscope/questions_json/gt_YES_1
2025-04-26 12:44:59,436 - INFO - Found 9 files in data/chainscope/questions_json/gt_YES_1
data_loader — INFO — Total files collected: 18
2025-04-26 12:44:59,438 - INFO - Total files collected: 18


In [3]:

# ───────────────────────────────────────────────
# 2. Collect dataset files
# ───────────────────────────────────────────────
#from e_confirm_xy_yx.main.data_loader import get_dataset_files
#dataset_files = get_dataset_files(DATA_ROOT, DATASETS)

# ───────────────────────────────────────────────
# 3. Prepare prompt builder
# ───────────────────────────────────────────────
from e_confirm_xy_yx.main.prompt_builder import PromptBuilder
pb = PromptBuilder(template_path=TEMPLATE_PATH, style="instr-v0", mode="cot")

# ───────────────────────────────────────────────
# 4. Run inference
# ───────────────────────────────────────────────
from e_confirm_xy_yx.main.inference import run_inference

run_inference(
    dataset_files=dataset_files,
    prompt_builder=pb,
    model=model,
    tokenizer=tokenizer,
    model_name=model_name,
    device=device,
    batch_size=BATCH_SIZE,
    max_new_tokens=MAX_NEW_TOKENS,
    save_hidden=SAVE_HIDDEN,
    hidden_layers=HIDDEN_LAYERS,
    save_attention=SAVE_ATTN,
    attn_layers=ATTN_LAYERS,
    output_dir=OUT_DIR / "completions",
    n_runs=N_RUNS,
    temperature=TEMPERATURE,
    top_p=TOP_P,
)


prompt_builder — INFO — Logger initialised; log file = /root/CoTFaithChecker/logs/prompt_builder_20250426_124459.log
2025-04-26 12:44:59,452 - INFO - Logger initialised; log file = /root/CoTFaithChecker/logs/prompt_builder_20250426_124459.log
prompt_builder — INFO — PromptBuilder initialised with style=instr-v0, mode=cot
2025-04-26 12:44:59,455 - INFO - PromptBuilder initialised with style=instr-v0, mode=cot
inference — INFO — Logger initialised; log file = /root/CoTFaithChecker/logs/inference_20250426_124459.log
2025-04-26 12:44:59,458 - INFO - Logger initialised; log file = /root/CoTFaithChecker/logs/inference_20250426_124459.log
inference — INFO — Processing wm-world-natural-area_gt_NO_1_9e073217.json
2025-04-26 12:44:59,461 - INFO - Processing wm-world-natural-area_gt_NO_1_9e073217.json
2025-04-26 12:44:59,462 - DEBUG - Loading data/chainscope/questions_json/gt_NO_1/wm-world-natural-area_gt_NO_1_9e073217.json
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


In [4]:
"""
# ───────────────────────────────────────────────
# 5. Verify model answers
# ───────────────────────────────────────────────
from e_confirm_xy_yx.main.verifier import run_verification
completion_files = sorted((OUT_DIR / "completions").glob("*_completions.json"))

run_verification(
    completion_files=completion_files,
    n_questions=N_VERIFY,
    output_dir=OUT_DIR / "verified",
)
"""

'\n# ───────────────────────────────────────────────\n# 5. Verify model answers\n# ───────────────────────────────────────────────\nfrom e_confirm_xy_yx.main.verifier import run_verification\ncompletion_files = sorted((OUT_DIR / "completions").glob("*_completions.json"))\n\nrun_verification(\n    completion_files=completion_files,\n    n_questions=N_VERIFY,\n    output_dir=OUT_DIR / "verified",\n)\n'

In [5]:

"""
# ───────────────────────────────────────────────
# 6. Cross-match YES vs NO answers
# ───────────────────────────────────────────────
from e_confirm_xy_yx.main.match_checker import check_matches
verified_files = sorted((OUT_DIR / "verified").glob("*_verified.json"))

# pair them: every gt_NO_X file with its matching gt_YES_X (adapt if lt)
pairs = [
    (
        vf,
        vf.parent
        / vf.name.replace("gt_NO", "gt_YES")
    )
    for vf in verified_files
    if "_NO_" in vf.name
]

for no_file, yes_file in pairs:
    out_match = (
        OUT_DIR
        / "matches"
        / f"{no_file.stem.replace('_verified','')}_match.json"
    )
    out_match.parent.mkdir(parents=True, exist_ok=True)
    check_matches(no_file, yes_file, out_match)
"""


'\n# ───────────────────────────────────────────────\n# 6. Cross-match YES vs NO answers\n# ───────────────────────────────────────────────\nfrom e_confirm_xy_yx.main.match_checker import check_matches\nverified_files = sorted((OUT_DIR / "verified").glob("*_verified.json"))\n\n# pair them: every gt_NO_X file with its matching gt_YES_X (adapt if lt)\npairs = [\n    (\n        vf,\n        vf.parent\n        / vf.name.replace("gt_NO", "gt_YES")\n    )\n    for vf in verified_files\n    if "_NO_" in vf.name\n]\n\nfor no_file, yes_file in pairs:\n    out_match = (\n        OUT_DIR\n        / "matches"\n        / f"{no_file.stem.replace(\'_verified\',\'\')}_match.json"\n    )\n    out_match.parent.mkdir(parents=True, exist_ok=True)\n    check_matches(no_file, yes_file, out_match)\n'