In [None]:
# ───────────────────────────────────────────────
# 0. Manual configuration
# ───────────────────────────────────────────────
%cd ..
%pwd
from pathlib import Path
import torch

DATA_ROOT = Path("data/chainscope/questions_json")
TEMPLATE_PATH = Path("data/chainscope/templates/instructions.json")
LOG_DIR = Path("logs")
OUT_DIR = Path("e_confirm_xy_yx/outputs")          # completions, verification, matches
MODEL_PATH = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"

# choose folder subsets
DATASETS = ["gt_NO_1", "gt_YES_1"]

BATCH_SIZE = 64
MAX_NEW_TOKENS = ModuleNotFoundError
SAVE_HIDDEN, SAVE_ATTN = False, False
HIDDEN_LAYERS, ATTN_LAYERS = [0, -1], [0, -1]   # ignored unless above switches True
N_VERIFY = 0   # 0 == verify all

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ───────────────────────────────────────────────
# 1. Load model & tokenizer  (your helper)
# ───────────────────────────────────────────────
from a_confirm_posthoc.utils.model_handler import load_model_and_tokenizer

model, tokenizer, model_name, device = load_model_and_tokenizer(MODEL_PATH)
model.to(device)


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


/root/CoTFaithChecker


  from .autonotebook import tqdm as notebook_tqdm
2025-04-25 15:37:38,237 - INFO - CUDA is available. Using GPU.
2025-04-25 15:37:38,238 - INFO - Loading model and tokenizer: deepseek-ai/DeepSeek-R1-Distill-Llama-8B onto cuda
Loading checkpoint shards: 100%|██████████| 2/2 [00:01<00:00,  1.88it/s]
2025-04-25 15:37:42,471 - INFO - Model and tokenizer loaded successfully.


LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
      )
    )
    (norm): LlamaRMSNorm((4096,), eps=1e-05)
    (rotary_

In [2]:
# ───────────────────────────────────────────────
# 2. Collect dataset files
# ───────────────────────────────────────────────
from e_confirm_xy_yx.main.data_loader import get_dataset_files
dataset_files = get_dataset_files(DATA_ROOT, DATASETS)

# ───────────────────────────────────────────────
# 3. Prepare prompt builder
# ───────────────────────────────────────────────
from e_confirm_xy_yx.main.prompt_builder import PromptBuilder
pb = PromptBuilder(template_path=TEMPLATE_PATH, style="instr-v0", mode="cot")

# ───────────────────────────────────────────────
# 4. Run inference
# ───────────────────────────────────────────────
from e_confirm_xy_yx.main.inference import run_inference

run_inference(
    dataset_files=dataset_files,
    prompt_builder=pb,
    model=model,
    tokenizer=tokenizer,
    model_name=model_name,
    device=device,
    batch_size=BATCH_SIZE,
    max_new_tokens=MAX_NEW_TOKENS,
    save_hidden=SAVE_HIDDEN,
    hidden_layers=HIDDEN_LAYERS,
    save_attention=SAVE_ATTN,
    attn_layers=ATTN_LAYERS,
    output_dir=OUT_DIR / "completions",
)



data_loader — INFO — Logger initialised; log file = /root/CoTFaithChecker/logs/data_loader_20250425_153757.log
2025-04-25 15:37:57,069 - INFO - Logger initialised; log file = /root/CoTFaithChecker/logs/data_loader_20250425_153757.log
data_loader — INFO — Found 57 files in data/chainscope/questions_json/gt_NO_1
2025-04-25 15:37:57,071 - INFO - Found 57 files in data/chainscope/questions_json/gt_NO_1
data_loader — INFO — Found 56 files in data/chainscope/questions_json/gt_YES_1
2025-04-25 15:37:57,072 - INFO - Found 56 files in data/chainscope/questions_json/gt_YES_1
data_loader — INFO — Total files collected: 113
2025-04-25 15:37:57,072 - INFO - Total files collected: 113
prompt_builder — INFO — Logger initialised; log file = /root/CoTFaithChecker/logs/prompt_builder_20250425_153757.log
2025-04-25 15:37:57,073 - INFO - Logger initialised; log file = /root/CoTFaithChecker/logs/prompt_builder_20250425_153757.log
prompt_builder — INFO — PromptBuilder initialised with style=instr-v0, mode=c

In [8]:
# ───────────────────────────────────────────────
# 5. Verify model answers
# ───────────────────────────────────────────────
from e_confirm_xy_yx.main.verifier3 import run_verification
completion_files = sorted((OUT_DIR / "completions").glob("*_completions.json"))

run_verification(
    completion_files=completion_files,
    n_questions=N_VERIFY,
    output_dir=OUT_DIR / "verified",
)

verifier — INFO — Verifying outputs/completions/aircraft-speeds_gt_NO_1_377c39d3_DeepSeek-R1-Distill-Llama-8B_completions.json
2025-04-25 16:29:36,526 - INFO - Verifying outputs/completions/aircraft-speeds_gt_NO_1_377c39d3_DeepSeek-R1-Distill-Llama-8B_completions.json
2025-04-25 16:29:36,527 - INFO - AFC is enabled with max remote calls: 10.
2025-04-25 16:29:37,190 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent "HTTP/1.1 200 OK"
2025-04-25 16:29:37,193 - INFO - AFC remote call 1 is done.
2025-04-25 16:29:37,193 - INFO - AFC is enabled with max remote calls: 10.
2025-04-25 16:29:37,650 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent "HTTP/1.1 200 OK"
2025-04-25 16:29:37,653 - INFO - AFC remote call 1 is done.
2025-04-25 16:29:37,653 - INFO - AFC is enabled with max remote calls: 10.
2025-04-25 16:29:38,219 - INFO - HTTP Request: POST https://generati

KeyboardInterrupt: 

In [9]:

# ───────────────────────────────────────────────
# 6. Cross-match YES vs NO answers
# ───────────────────────────────────────────────
from e_confirm_xy_yx.main.match_checker import check_matches
verified_files = sorted((OUT_DIR / "verified").glob("*_verified.json"))

# pair them: every gt_NO_X file with its matching gt_YES_X (adapt if lt)
pairs = [
    (
        vf,
        vf.parent
        / vf.name.replace("gt_NO", "gt_YES")
    )
    for vf in verified_files
    if "_NO_" in vf.name
]

for no_file, yes_file in pairs:
    out_match = (
        OUT_DIR
        / "matches"
        / f"{no_file.stem.replace('_verified','')}_match.json"
    )
    out_match.parent.mkdir(parents=True, exist_ok=True)
    check_matches(no_file, yes_file, out_match)


match_checker — INFO — Logger initialised; log file = /root/CoTFaithChecker/logs/match_checker_20250425_165027.log
2025-04-25 16:50:27,159 - INFO - Logger initialised; log file = /root/CoTFaithChecker/logs/match_checker_20250425_165027.log


FileNotFoundError: [Errno 2] No such file or directory: 'outputs/verified/aircraft-speeds_gt_YES_1_377c39d3_DeepSeek-R1-Distill-Llama-8B_verified.json'