In [1]:
%cd ..
%pwd
import os
import json
import logging
import sys

logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')

# Add src to path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), 'src')))

# %% 
# Configuration

# --- Core Parameters for a Single Run --- 
MODEL_PATH = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
DATASET = "mmlu"

# --- Parameters for the Logprobs Experiment --- 
HINT_TYPES_TO_ANALYZE = ["induced_urgency", "sycophancy"] # List of hints to compare against baseline
INTERVENTION_TYPES = ["dots", "dots_eot"] # Types of intervention prompts
PERCENTAGE_STEPS = list(range(10, 101, 10)) # Analyze at 10%, 20%, ..., 100%

# --- Run Control & File Parameters --- 
N_QUESTIONS = 500 # Number of questions used to generate source files (e.g., completions_with_500.json)
DEMO_MODE_N = 5 # Set to None to run on all relevant questions, or integer N for first N
DATA_DIR = "./data"

/root/CoTFaithChecker


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [2]:
from a_confirm_posthoc.src.utils.model_handler import load_model_and_tokenizer

logging.info(f"Loading model and tokenizer: {MODEL_PATH}")
try:
    model, tokenizer, model_name_from_load, device = load_model_and_tokenizer(MODEL_PATH)
    logging.info(f"Model loaded successfully on device: {device}")
    # Derive model_name for directory paths (consistent with other scripts)
    model_name = MODEL_PATH.split('/')[-1]
except Exception as e:
    logging.error(f"Failed to load model or tokenizer: {e}", exc_info=True)
    # Exit or handle error appropriately - maybe raise SystemExit?
    raise SystemExit("Model/Tokenizer loading failed.")



  from .autonotebook import tqdm as notebook_tqdm
2025-04-24 13:48:48,942 - INFO - Loading model and tokenizer: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
2025-04-24 13:48:48,982 - INFO - CUDA is available. Using GPU.
2025-04-24 13:48:48,983 - INFO - Loading model and tokenizer: deepseek-ai/DeepSeek-R1-Distill-Llama-8B onto cuda
2025-04-24 13:48:48,987 - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
2025-04-24 13:48:49,081 - DEBUG - https://huggingface.co:443 "HEAD /deepseek-ai/DeepSeek-R1-Distill-Llama-8B/resolve/main/tokenizer_config.json HTTP/1.1" 200 0
2025-04-24 13:48:49,659 - DEBUG - https://huggingface.co:443 "HEAD /deepseek-ai/DeepSeek-R1-Distill-Llama-8B/resolve/main/config.json HTTP/1.1" 200 0
Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00,  1.15s/it]
2025-04-24 13:48:52,545 - DEBUG - https://huggingface.co:443 "HEAD /deepseek-ai/DeepSeek-R1-Distill-Llama-8B/resolve/main/generation_config.json HTTP/1.1" 200 0
2025-04-24 13:48:56,311 - INFO - Model 

In [3]:
# Run Logprobs Analysis Pipeline

from d_logprobs_analysis.src.main.pipeline import run_logprobs_analysis_for_hint_types


if model and tokenizer:
    # Define base output directory for this model/dataset
    output_dir_base = os.path.join(DATA_DIR, DATASET, model_name)

    run_logprobs_analysis_for_hint_types(
        model=model,
        tokenizer=tokenizer,
        device=device,
        model_name=model_name, # Use derived name for path consistency
        dataset=DATASET,
        data_dir=DATA_DIR,
        hint_types_to_analyze=HINT_TYPES_TO_ANALYZE,
        intervention_types=INTERVENTION_TYPES,
        percentage_steps=PERCENTAGE_STEPS,
        n_questions=N_QUESTIONS,
        demo_mode_n=DEMO_MODE_N,
        output_dir_base=output_dir_base
    )
else:
    logging.error("Pipeline execution skipped due to model/tokenizer loading failure.")


2025-04-24 13:48:56,354 - INFO - --- Starting Logprobs Analysis --- 
2025-04-24 13:48:56,356 - INFO - Model: DeepSeek-R1-Distill-Llama-8B, Dataset: mmlu
2025-04-24 13:48:56,356 - INFO - Analyzing Hint Types: ['induced_urgency', 'sycophancy']
2025-04-24 13:48:56,357 - INFO - Intervention Types: ['dots', 'dots_eot']
2025-04-24 13:48:56,376 - INFO - Loaded MCQ data for 5000 questions.
2025-04-24 13:48:56,377 - INFO - Loading hint verification data for hint type: induced_urgency
2025-04-24 13:48:56,379 - INFO -   Found 71 questions that switched to hint 'induced_urgency'.
2025-04-24 13:48:56,380 - INFO - Loading hint verification data for hint type: sycophancy
2025-04-24 13:48:56,382 - INFO -   Found 77 questions that switched to hint 'sycophancy'.
2025-04-24 13:48:56,382 - INFO - Total relevant QIDs (switched to any analyzed hint): 109
2025-04-24 13:48:56,383 - INFO - Determined standard options: ['A', 'B', 'C', 'D']
2025-04-24 13:48:56,405 - INFO - Standard option token IDs: {'A': 362, '

2025-04-24 13:48:58,213 - INFO - Analyzing 530 reasoning tokens across 10 steps.
Processing Baseline:  20%|██        | 1/5 [00:03<00:12,  3.01s/it]2025-04-24 13:48:59,427 - DEBUG - Found reasoning start marker 'assistant' at index 349.
2025-04-24 13:48:59,428 - DEBUG - Found reasoning end marker '</think>' at index 2028.
2025-04-24 13:48:59,434 - INFO - Analyzing 358 reasoning tokens across 10 steps.
2025-04-24 13:49:00,490 - INFO - Analyzing 358 reasoning tokens across 10 steps.
Processing Baseline:  40%|████      | 2/5 [00:05<00:07,  2.48s/it]2025-04-24 13:49:01,537 - DEBUG - Found reasoning start marker 'assistant' at index 259.
2025-04-24 13:49:01,538 - DEBUG - Found reasoning end marker '</think>' at index 5151.
2025-04-24 13:49:01,551 - INFO - Analyzing 1133 reasoning tokens across 10 steps.
2025-04-24 13:49:03,447 - INFO - Analyzing 1133 reasoning tokens across 10 steps.
Processing Baseline:  60%|██████    | 3/5 [00:08<00:06,  3.10s/it]2025-04-24 13:49:05,366 - DEBUG - Found rea