In [1]:
# load model from hf_cache
import os
from pathlib import Path

cache_dir = Path.cwd() / "hf_cache"
cache_dir.mkdir(exist_ok=True)
os.environ["HF_HOME"] = str(cache_dir)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# set dpi to 300
plt.rcParams['figure.dpi'] = 300

import openai
import json
import numpy as np
import random

from transformers import AutoTokenizer, AutoModelForCausalLM
from baukit import Trace
import torch
from copy import deepcopy
from tqdm import tqdm

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"device: {device}")

device: cuda


False

In [3]:
from dotenv import load_dotenv
load_dotenv()

True

In [4]:
MODEL_NAME = "allenai/Olmo-3-7B-Instruct"

In [30]:
RANDOM_STATE = 42

In [5]:
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map="auto", cache_dir=cache_dir, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16).eval()
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

`torch_dtype` is deprecated! Use `dtype` instead!
Skipping import of cpp extensions due to incompatible torch version 2.8.0+cu128 for torchao version 0.14.1             Please see https://github.com/pytorch/ao/issues/2919 for more info


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [9]:
def parse_answer(answer):
    answer_chunks = answer.strip().split(" ")
    for c in answer_chunks:
        if c == "A":
            return "A"
        elif c == "B":
            return "B"
    return "X"

In [15]:
def get_message(prompt, system_prompt=None):
    if system_prompt:
        return [{"role": "system", "content": system_prompt}, {"role": "user", "content": prompt}]
    else:
        return [{"role": "user", "content": prompt}]

def get_response(input):

    system_prompt = "You are a helpful assistant. Answer shortly with only your choice with no explanation."

    msg = get_message(input, system_prompt)
    input = tokenizer.apply_chat_template(msg, return_tensors="pt", add_generation_prompt=True, return_dict=True).to(device)
    gen = model.generate(**input, max_new_tokens=200, pad_token_id=tokenizer.eos_token_id, do_sample=False)
    return tokenizer.decode(gen[0][len(input.input_ids[0]):], skip_special_tokens=True)

In [None]:
df = pd.read_csv("Question_Subsets/certainty_effect_pairs.csv", index_col=0).reset_index()
df['control_resp_full'] = ''
df['treatment_resp_full'] = ''

df['control_resp'] = ''
df['treatment_resp'] = ''

In [18]:
for i in range(len(df)):
    control_text = df.iloc[i]['control_text']
    treatment_text = df.iloc[i]['treatment_text']

    control_resp = get_response(control_text)
    treatment_resp = get_response(treatment_text)

    df.at[i, 'control_resp_full'] = control_resp
    df.at[i, 'treatment_resp_full'] = treatment_resp

    df.at[i, 'control_resp'] = parse_answer(control_resp)
    df.at[i, 'treatment_resp'] = parse_answer(treatment_resp)

    print(i)
    print(control_resp)

    print(treatment_resp)
    print('---')

The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


0
A
Option B
---
1
Option A
A
---
2
Option A
Option A
---
3
A
A
---
4
Option B
A
---
5
A
A
---
6
Option A
Option A
---
7
A
A
---
8
Option B
A
---
9
Option A
A
---
10
A
A
---
11
A
Option B
---
12
B
A
---
13
A
A
---
14
A
Option B
---
15
B
A
---
16
A
A
---
17
B
A
---
18
A
A
---
19
A
B
---
20
A
B
---
21
B
A
---
22
A
A
---
23
B
A
---
24
Option B
A
---
25
A
Option B
---
26
B
A
---
27
Option A
B
---
28
A
A
---
29
B
A
---
30
A
Option B
---
31
Option B
A
---
32
Option B
A
---
33
A
A
---
34
B
A
---
35
A
A
---
36
Option A
B
---
37
Option B
B
---
38
A
Option B
---
39
B
A
---
40
Option A
B
---
41
Option B
A
---
42
A
A
---
43
Option A
B
---
44
A
B
---
45
A
A
---
46
B
A
---
47
A
B
---
48
A
B
---
49
A
Option B
---
50
A
B
---
51
A
B
---
52
B
A
---
53
A
B
---
54
A
B
---
55
A
A
---
56
B
A
---
57
A
B
---
58
A
B
---
59
B
A
---
60
A
Option B
---
61
B
A
---
62
A
B
---
63
A
B
---
64
B
A
---
65
B
A
---
66
A
B
---
67
A
B
---
68
B
A
---
69
A
B
---
70
A
B
---
71
B
A
---
72
A
B
---
73
A
B
---
74
B
A
---
75
A
B
---

In [24]:
df['control_resp_eq_treatment_resp'] = df.control_resp == df.treatment_resp

In [25]:
df.to_csv('Question_Subsets/certainty_effect_pairs_with_answers.csv')

In [27]:
df_switch = df[df.control_resp != df.treatment_resp]

In [29]:
df_switch_sample = df_switch.sample(10, random_state=RANDOM_STATE)

In [34]:
df_switch_sample.to_csv('Question_Subsets/certainty_effect_pairs_switch_sample.csv')