In [65]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from dialz import Dataset, SteeringModel, SteeringVector, visualize_activation

#for managing cores
import torch
import torch.nn.functional as F
import os
import multiprocessing

In [66]:
torch.manual_seed(3000)
torch.cuda.manual_seed_all(3000)

In [73]:
# Device
device = "cuda" if torch.cuda.is_available() else "cpu"

# Model and tokenizer
model_name = "mistralai/Mistral-7B-Instruct-v0.3"
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")

Loading checkpoint shards: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 3/3 [00:01<00:00,  1.72it/s]


In [None]:
# Use all logical CPU cores - default seems to max out at 32
num_cores = multiprocessing.cpu_count()
torch.set_num_threads(num_cores)
torch.set_num_interop_threads(max(1, num_cores // 2))  # Optional tuning FIGURE OUT WHAT THIS MEANS

print(f" Using {num_cores} CPU threads for PyTorch")
#this configures PyTorch to fully use the CPU.

RuntimeError: Error: cannot set number of interop threads after parallel work has started or set_num_interop_threads called

In [None]:
layer_ids = list(range(6, 20))


In [74]:
model = SteeringModel(model_name, layer_ids)

Loading checkpoint shards: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 3/3 [00:05<00:00,  1.67s/it]


In [None]:
dataset = Dataset()
dataset = Dataset.create_dataset(model_name, ['uncertain about every statement you make', 'certain about every statement you make'])

In [None]:
print(dataset)

Positive: <s>[INST] Act as if you are extremely uncertain about every statement you make.

That game[/INST]
Negative: <s>[INST] Act as if you are extremely certain about every statement you make.

That game[/INST]
Positive: <s>[INST] Act as if you are extremely uncertain about every statement you make.

I can see[/INST]
Negative: <s>[INST] Act as if you are extremely certain about every statement you make.

I can see[/INST]
Positive: <s>[INST] Act as if you are extremely uncertain about every statement you make.

Hmm, this[/INST]
Negative: <s>[INST] Act as if you are extremely certain about every statement you make.

Hmm, this[/INST]
Positive: <s>[INST] Act as if you are extremely uncertain about every statement you make.

I can relate to[/INST]
Negative: <s>[INST] Act as if you are extremely certain about every statement you make.

I can relate to[/INST]
Positive: <s>[INST] Act as if you are extremely uncertain about every statement you make.

Who is[/INST]
Negative: <s>[INST] Act as 

In [None]:
vector = SteeringVector.train(model, dataset, method="mean_diff") 

100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 19/19 [11:45<00:00, 37.13s/it]
100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 31/31 [00:00<00:00, 400.95it/s]


In [103]:
import pickle

# Save
with open("data/dialz_vector.pkl", "wb") as f:
    pickle.dump(vector, f)


In [None]:
# Later load
with open("data/dialz_vector.pkl", "rb") as f:
    vector2 = pickle.load(f)

In [None]:
# hominid question performance

In [69]:
#using token probabilities
def generate_output(
    input,
    sys=True,
    max_new_tokens=100
):
    messages = []

    if sys:
        messages.append({"role": "system", "content": system_prompt})

    messages.append({"role": "user", "content": input})

    chat_inputs = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        return_tensors="pt",
        return_dict=True,
    )

    input_ids = chat_inputs["input_ids"].to(model.device)
    attention_mask = chat_inputs["attention_mask"].to(model.device)

    outputs = model.generate(
        input_ids=input_ids,
        attention_mask=attention_mask,
        max_new_tokens=max_new_tokens,
        pad_token_id=tokenizer.eos_token_id,
        do_sample=True,
        output_scores=True,           
        return_dict_in_generate=True,       
    )

    generated_ids = outputs.sequences[:, input_ids.shape[1]:]

    text = tokenizer.decode(
        generated_ids[0],
        skip_special_tokens=True
    )

    return text, outputs.scores

In [70]:
input= """Look through recent research on hominid species dating from the Middle Pliocene. Does it indicate that there was (as of 2020)?:[
"A: multiple hominid species but with limited diversity.",
"B: a single species with no diversity.",
"C: decreased species diversity but increased numbers of hammerstones and flakes, indicating stone tool manufacture.",
"D: a single dominant species that outcompeted all others, leading to decreased diversity.",
"E: increased species diversity due to a prolonged ice age followed by a severe drought.",
"F: decreased species diversity due to a prolonged ice age followed by a severe drought.",
"G: a great amount of species diversity, or a single species that exhibited a lot of diversity.",
"H: increased species diversity but with decreased population numbers due to harsh climate conditions.",
"I: increased species diversity but decreased numbers of hammerstones and flakes, indicating less stone tool manufacture.",
"J: very little species diversity during this period and very few hominids."
]"""

In [71]:
system_prompt =""" Read the question, analyze step by step, provide your answer. Use the following format to answer:  Explanation: [insert YOUR short step-by-step analysis here] 
Answer: [ONLY the option LETTER, where the LETTER is one of {A,B,C,D,E,F,G,H,I,J}]. Only give me the reply according to this format, donâ€™t give me any other words."""

In [75]:
print("Baseline response:")
model.reset()
answer, scores = generate_output(input, sys=False, max_new_tokens=500)
print(answer)

Baseline response:
As of 2020, the research on hominid species dating from the Middle Pliocene indicates that "A: multiple hominid species but with limited diversity." is the most accurate statement. There were several hominid species present during this period, but the level of diversity among them was relatively restricted compared to later Homo species. This is based on fossil discoveries and genetic analyses. The finding of stone tools can suggest a certain level of sophistication or complexity, but it is not directly linked to species diversity. Factors such as climate, habitat, competition, and migration played important roles in shaping the species and their populations during this period. However, interpretations can vary among experts, and new findings might lead to updates in our understanding over time.


In [76]:
print("Sytem prompted response:")
model.reset()
answer, scores = generate_output(input, sys=True, max_new_tokens=500)
print(answer)

Sytem prompted response:
Explanation: The question asks about the species diversity among hominids during the Middle Pliocene as of 2020. It does not provide any information about stone tool manufacture, climate conditions, or any specific dominant species. Therefore, the research would not necessarily indicate increased or decreased diversity due to harsh climate conditions, ice ages, or droughts, nor would it support suggestions of decreased numbers of hammerstones and flakes. As a result, we can't conclude that the hominids exhibited a great amount of diversity or were a single species that showed a lot of diversity.

Answer: J: very little species diversity during this period and very few hominids.


In [83]:
# Last-step logits and probabilities
last_logits = scores[-17]
last_probs = torch.softmax(last_logits, dim=-1)

# Prevent log(0)
last_probs = last_probs.clamp(min=1e-12)

entropy = -(last_probs * last_probs.log()).sum(dim=-1).item()

# Top 5 predicted tokens at last step
top5_probs, top5_ids = last_probs[0].topk(5)
print("Top 5 tokens and probabilities:")
for token_id, prob in zip(top5_ids, top5_probs):
    print(tokenizer.decode([token_id.item()]), f"{prob.item():.4f}")

entropy


Top 5 tokens and probabilities:
J 0.9931
A 0.0041
L 0.0009
H 0.0004
I 0.0003


0.0525316521525383

In [None]:
aaaaaa

In [84]:
N = 20
entropies = []

for logits in scores[:min(N, len(scores))]:
    probs = torch.softmax(logits, dim=-1).clamp(min=1e-12)
    H = -(probs * probs.log()).sum().item()
    entropies.append(H)

mean_entropy = sum(entropies) / len(entropies)
mean_entropy

0.3914052701845776

In [85]:
print("light, flat steering response:")
steering_factor = 0.01
model.reset()
model.set_control(vector, steering_factor)
answer, scores = generate_output(input, sys=False, max_new_tokens=500)
print(answer)

light, flat steering response:
As of 2020, the research on hominid species dating from the Middle Pliocene does not clearly indicate any one of the provided options. However, it is generally accepted that there were multiple hominid species during this time, but with limited diversity (Option A). Some of these species may have been developing and using stone tools (Option C), and their numbers may have been affected by climate conditions (Option H or J, depending on whether tool manufacturing increased or decreased under harsh climate conditions). The diversity of these species and the impact of climate change on their numbers are subjects of ongoing research. It's essential to note that knowledge in this field can change with new discoveries and evidence.


In [86]:
print("light, flat steering prompted:")
steering_factor = 0.01
model.reset()
model.set_control(vector, steering_factor)
answer, scores = generate_output(input, sys=True, max_new_tokens=500)
print(answer)

light, flat steering prompted:
Explanation: The question is asking about the diversity of hominid species in the Middle Pliocene, not about stone tool manufacture or climate conditions. Therefore, the options B and J can be eliminated since they imply a lack of diversity. The question does not directly refer to an ice age or a subsequent drought, so options E, F, and H can also be eliminated. The remaining options A, C, D, G, I do not indicate whether the diversity is "as of 2020" or not, but the question asks for the state "as of 2020". Thus, none of the provided options clearly matches the question's specific requirements. However, option A, "multiple hominid species but with limited diversity.", is the most plausible answer based on current understanding of hominid evolution from the Middle Pliocene. But none of the provided options directly answers the question "as of 2020".

Answer: Not applicable, as none of the provided options directly answers the question "as of 2020".


Another way to approach this with one less step of generation

In [87]:
print("last token entropy steeering, unprompted:")
steering_factor = entropy/50
model.reset()
model.set_control(vector, steering_factor)
answer, scores = generate_output(input, sys=True, max_new_tokens=500)
print(answer)

last token entropy steeering, unprompted:
Explanation: The question asks about the species diversity in hominids from the Middle Pliocene, not about the manufacture of stone tools or population numbers. Given that recent research would likely document the discovery and study of numerous hominid species, the most accurate answer would be A: multiple hominid species but with limited diversity.


In [88]:
print("mean entropy steeering, prompted:")
steering_factor = mean_entropy/50
model.reset()
model.set_control(vector, steering_factor)
answer, scores = generate_output(input, sys=True, max_new_tokens=500)
print(answer)

mean entropy steeering, prompted:
Explanation: The research on hominid species from the Middle Pliocene does not necessarily provide information about the number or types of stone tools they manufactured. Therefore, we cannot conclude whether there was decreased (option C) or increased (option I) numbers of hammerstones and flakes, indicating less or more stone tool manufacture, respectively.

However, the research may indicate multiple hominid species but without necessarily implying great diversity between them (option A). It is also possible that there was a single dominant species that outcompeted others, leading to decreased diversity (option D). Additionally, harsh climate conditions may have contributed to decreased population numbers among these hominid species (options H and J), although the research may not provide specific information about this.

Answer: A or D


In [89]:
input= """Which of the following is the body cavity that contains the pituitary gland?[
A:"Ventral",
B:"Dorsal",
C:"Buccal",
D:"Thoracic",
E:"Pericardial",
F:"Abdominal",
G:"Spinal",
H:"Pelvic",
I:"Pleural",
J:"Cranial"
]"""

In [90]:
system_prompt =""" Read the question, analyze step by step, provide your answer. Use the following format to answer:  Explanation: [insert YOUR short step-by-step analysis here] 
Answer: [ONLY the option LETTER, where the LETTER is one of {A,B,C,D,E,F,G,H,I,J}]. Only give me the reply according to this format, donâ€™t give me any other words."""

In [91]:
print("Baseline response:")
model.reset()
answer, scores = generate_output(input, sys=False, max_new_tokens=500)
print(answer)

Baseline response:
[J] "Cranial" is the correct answer. The pituitary gland is located in the cranial cavity, specifically in a bony structure called the sella turcica.


In [92]:
print("Sytem prompted response:")
model.reset()
answer, scores = generate_output(input, sys=True, max_new_tokens=500)
print(answer)

Sytem prompted response:


Answer: J


In [95]:
# Last-step logits and probabilities
last_logits = scores[-2]
last_probs = torch.softmax(last_logits, dim=-1)

# Prevent log(0)
last_probs = last_probs.clamp(min=1e-12)

entropy = -(last_probs * last_probs.log()).sum(dim=-1).item()

# Top 5 predicted tokens at last step
top5_probs, top5_ids = last_probs[0].topk(5)
print("Top 5 tokens and probabilities:")
for token_id, prob in zip(top5_ids, top5_probs):
    print(tokenizer.decode([token_id.item()]), f"{prob.item():.4f}")

entropy


Top 5 tokens and probabilities:
F 0.8951
J 0.0538
H 0.0342
G 0.0122
A 0.0032


0.4559553265571594

In [94]:
aaaaaaaa

NameError: name 'aaaaaaaa' is not defined

In [96]:
N = 20
entropies = []

for logits in scores[:min(N, len(scores))]:
    probs = torch.softmax(logits, dim=-1).clamp(min=1e-12)
    H = -(probs * probs.log()).sum().item()
    entropies.append(H)

mean_entropy = sum(entropies) / len(entropies)

In [97]:
mean_entropy

0.518360641446634

In [98]:
print("light, flat steering response:")
steering_factor = 0.01
model.reset()
model.set_control(vector, steering_factor)
answer, scores = generate_output(input, sys=False, max_new_tokens=500)
print(answer)

light, flat steering response:


The correct answer is J: "Cranial". The pituitary gland is located within the cranial cavity of the skull, which is the body cavity that contains the brain. It is also referred to as the sella turcica, which is a part of the sphenoid bone that makes up the base of the skull.


In [99]:
print("light, flat steering prompted:")
steering_factor = 0.01
model.reset()
model.set_control(vector, steering_factor)
answer, scores = generate_output(input, sys=True, max_new_tokens=500)
print(answer)

light, flat steering prompted:
Answer: F


In [100]:
print("last token entropy steeering, prompted:")
steering_factor = entropy/50
model.reset()
model.set_control(vector, steering_factor)
answer, scores = generate_output(input, sys=True, max_new_tokens=500)
print(answer)

last token entropy steeering, prompted:
Explanation: The pituitary gland is located in the cranial cavity, which is the space within the skull surrounding the brain.
Answer: J


In [101]:
print("mean entropy steeering, prompted:")
steering_factor = mean_entropy/50
model.reset()
model.set_control(vector, steering_factor)
answer, scores = generate_output(input, sys=True, max_new_tokens=500)
print(answer)

mean entropy steeering, prompted:
Answer: F


In [102]:
print("mean entropy steeering, unprompted:")
steering_factor = mean_entropy/50
model.reset()
model.set_control(vector, steering_factor)
answer, scores = generate_output(input, sys=False, max_new_tokens=500)
print(answer)

mean entropy steeering, unprompted:


The pituitary gland is situated in the cranial cavity, specifically a part of it called the sella turcica. Therefore, the correct answer is J: "Cranial".
