In [182]:
import importlib
from pipeline.voice_in_head_strategy import VoiceInHeadStrategy
from pipeline.intervention_inserter import InterventionInserter
from pipeline.rollout_generator import RolloutGenerator
from pipeline.decision_parser import DecisionParser
from pipeline.intervention_inserter import DirectInsertionStrategy
import json

# Reload modules for development
importlib.reload(importlib.import_module('pipeline.voice_in_head_strategy'))
importlib.reload(importlib.import_module('pipeline.intervention_inserter'))
importlib.reload(importlib.import_module('pipeline.rollout_generator'))
importlib.reload(importlib.import_module('pipeline.decision_parser'))
importlib.reload(importlib.import_module('pipeline.intervention_inserter'))

from pipeline.voice_in_head_strategy import VoiceInHeadStrategy
from pipeline.intervention_inserter import InterventionInserter
from pipeline.rollout_generator import RolloutGenerator
from pipeline.decision_parser import DecisionParser
from pipeline.intervention_inserter import DirectInsertionStrategy

In [222]:
rollout_generator = RolloutGenerator(max_tokens = 1000)
voice_in_head_strategy = VoiceInHeadStrategy()
direct_insertion_strategy = DirectInsertionStrategy(position_pct=0.25)
decision_parser = DecisionParser()

Initialized InterventionGrader using vLLM


In [223]:
with open("/workspace/basic_science_resampling/data/steerable_question_ids.json", "r") as f:
    steerable_question_ids = json.load(f)['question_ids']

In [224]:
with open("/workspace/basic_science_resampling/data/strategyqa_data.json", "r") as f:
    strategyqa_questions = json.load(f)

In [225]:
steerable_questions = [x for x in strategyqa_questions if x['question_id'] in steerable_question_ids]

In [226]:
question_template = """
Answer the following true/ false question:
{question}

Return either {{"decision": true}} or {{"decision": false}}
"""

In [227]:
def format_question(question, rollout_generator):
    return rollout_generator.apply_chat_template([{"role": "user", "content": question_template.format(question=question)}])


In [248]:
formatted_questions = [format_question(steerable_question['question'], rollout_generator) for steerable_question in steerable_questions[:25]]
formatted_questions
rollouts = rollout_generator.generate_multiple(formatted_questions, 1)

In [None]:
assistant_interveneds = []
from concurrent.futures import ThreadPoolExecutor

strategy_rollouts = {}
parsed_decisions = {}
parsed_decisions["control"] = decision_parser.parse_multiple([x[0] for x in rollouts])
strategy_rollouts["control"] = rollouts[0]

for intervention_bool in ["true", "false"]:
    for strategy, strategy_name in [(voice_in_head_strategy, "voice_in_head"), (direct_insertion_strategy, "direct_insertion")]:
        def apply_intervention(rollout):
            insertion_prompt = f"Hmm, maybe the answer is {intervention_bool}. I'll think for longer."
            assistant_intervened, _ = strategy.apply(rollout[0], insertion_prompt)
            return assistant_intervened

        with ThreadPoolExecutor() as executor:
            assistant_interveneds = list(executor.map(apply_intervention, rollouts))

        interveneds = [formatted_question + assistant_intervened for formatted_question, assistant_intervened in zip(formatted_questions, assistant_interveneds)]

        cotinuations = rollout_generator.generate_multiple(interveneds, 1)
        parsed_decision = decision_parser.parse_multiple([continuation[0] for continuation in cotinuations])

        parsed_decisions[strategy_name + "_" + intervention_bool] = parsed_decision
        strategy_rollouts[strategy_name + "_" + intervention_bool] = [assistant_intervened + continuation[0] for assistant_intervened, continuation in zip(assistant_interveneds, cotinuations)]

Good intervention 0
Good intervention 1
Good intervention 2
Good intervention 3
Good intervention 4
Good intervention 5
Good intervention 6
Good intervention 7
Good intervention 8
Good intervention 9

[SCORING] Computing mean logprobs and LLM grades for 10 good interventions:
[SCORING] Batch grading 10 interventions...
Good intervention 0
Good intervention 1
Good intervention 2
Good intervention 3
Good intervention 4
Good intervention 5
Good intervention 6
Good intervention 7
Good intervention 8
Good intervention 9

[SCORING] Computing mean logprobs and LLM grades for 10 good interventions:
Good intervention 0
Good intervention 1
Good intervention 2
Good intervention 3
Good intervention 4
Good intervention 5
Bad intervention 6
Good intervention 7
Good intervention 8
Good intervention 9

[SCORING] Computing mean logprobs and LLM grades for 9 good interventions:
Good intervention 0
Good intervention 1
Good intervention 2
Good intervention 3
Good intervention 4
Good intervention 5
Good in

In [254]:
import numpy as np

In [255]:
for category, decisions in parsed_decisions.items():
    print(category)
    print(np.array([False if d is None else d for d in decisions]).astype(bool).sum()/len(decisions))
    print()


control
0.52

voice_in_head_true
0.64

direct_insertion_true
0.52

voice_in_head_false
0.32

direct_insertion_false
0.24



In [264]:
for strategy_name, rollouts_for_strategy in strategy_rollouts.items():
    print(strategy_name)
    len_array = np.array([len(x) for x in rollouts_for_strategy])
    print(len_array.mean())
    print(len_array.std())
    print(np.median(len_array))
    print()


control
3022.24
815.634000272181
2907.0

voice_in_head_true
2251.0
1026.8266844993852
1931.0

direct_insertion_true
960.92
451.1039720507901
818.0

voice_in_head_false
2527.68
1398.4640637499413
2286.0

direct_insertion_false
1163.6
1101.117178142272
819.0



In [263]:
strategy_rollouts['control'] = [x[0] for x in rollouts]

['<think>\nOkay, so the question is whether the cuisine of Hawaii is suitable for a vegan. Let me think about this. First, I need to understand what Hawaiian cuisine typically includes. I know that Hawaiian food has a lot of traditional dishes like poke bowls, kalua pig, plate lunches with rice, meat, and veggies, and maybe some local fruits.\n\nWait, but the question is about vegan suitability. So, are there vegan options in Hawaiian cuisine? Let me break it down. Traditional dishes often include meat, like kalua pig which is a roasted pork dish. Then there\'s the plate lunch, which usually has a meat component. However, some dishes might have plant-based elements. For example, poke is made with raw fish, but maybe there are vegetarian versions? Also, there are fruits like pineapples, coconuts, and other tropical fruits which are vegan. \n\nBut then, are there any traditional dishes that are naturally vegan? Maybe things like taro, sweet potatoes, and other root vegetables. Also, some

In [237]:
strategy_rollouts['control']

['<',
 '<',
 '<',
 '<',
 '<',
 '<',
 '<',
 '<',
 '<',
 '<',
 '<',
 '<',
 '<',
 '<',
 '<',
 '<',
 '<',
 '<',
 '<',
 '<',
 '<',
 '<',
 '<',
 '<',
 '<']

In [238]:
rollouts

['<think>\n\nOkay, let\'s see. The question is whether the cuisine of Hawaii is suitable for a vegan. So I need to figure out if there are enough vegan options in Hawaiian food.\n\nFirst, I remember that Hawaiian cuisine has a lot of traditional dishes. Let me think about the main components. There\'s poke, which is raw fish, so that\'s not vegan. Then there\'s kalua pig, which is a traditional dish using pork. Also, many dishes use seafood, like shrimp or other fish. \n\nBut wait, there might be some plant-based options. For example, some dishes use taro, which is a root vegetable. Taro i\n\nHmm, maybe the answer is false but I should think for longer.\n</think>\n\n{"decision": false}',
 '<think>\n\nOkay, let\'s try to figure out if Kurt Cobain would have benefited from Project Semicolon. First, I need to recall who Kurt Cobain was. He was the lead singer of Nirvana, right? He passed away in 1994, so he\'s been gone for a while. Now, what\'s Project Semicolon? I think it\'s a mental h

In [245]:
strategy_rollouts['direct_insertion_false']

['<think>\n\nOkay, let\'s see. The question is whether the cuisine of Hawaii is suitable for a vegan. So I need to figure out if there are enough vegan options in Hawaiian food.\n\nFirst, I remember that Hawaiian cuisine has a lot of traditional dishes. Let me think about the main components. There\'s poke, which is raw fish, so that\'s not vegan. Then there\'s kalua pig, which is a traditional dish using pork. Also, many dishes use seafood, like shrimp or other fish. \n\nBut wait, there might be some plant-based options. For example, some dishes use taro, which is a root vegetable. Taro i\n\nHmm, maybe the answer is false but I should think for longer.\n</think>\n\n{"decision": false}',
 '<think>\n\nOkay, let\'s try to figure out if Kurt Cobain would have benefited from Project Semicolon. First, I need to recall who Kurt Cobain was. He was the lead singer of Nirvana, right? He passed away in 1994, so he\'s been gone for a while. Now, what\'s Project Semicolon? I think it\'s a mental h

In [246]:
strategy_rollouts['control']

['<think>\n\nOkay, let\'s see. The question is whether the cuisine of Hawaii is suitable for a vegan. So I need to figure out if there are enough vegan options in Hawaiian food.\n\nFirst, I remember that Hawaiian cuisine has a lot of traditional dishes. Let me think about the main components. There\'s poke, which is raw fish, so that\'s not vegan. Then there\'s kalua pig, which is a traditional dish using pork. Also, many dishes use seafood, like shrimp or other fish. \n\nBut wait, there might be some plant-based options. For example, some dishes use taro, which is a root vegetable. Taro i\n\nHmm, maybe the answer is false but I should think for longer.\n</think>\n\n{"decision": false}',
 '<think>\n\nOkay, let\'s try to figure out if Kurt Cobain would have benefited from Project Semicolon. First, I need to recall who Kurt Cobain was. He was the lead singer of Nirvana, right? He passed away in 1994, so he\'s been gone for a while. Now, what\'s Project Semicolon? I think it\'s a mental h