In [1]:
import os
import sys
import pandas as pd
from transformers import AutoTokenizer
from dotenv import load_dotenv

module_path = os.path.abspath(os.path.join(os.getcwd(), '..'))
if module_path not in sys.path:
    sys.path.insert(0, module_path)

from dialz.dataset import Dataset
from dialz.model import SteeringModel
from dialz.vector import SteeringVector


In [2]:
load_dotenv()
hf_token = os.getenv("HF_TOKEN")
model_name = "mistralai/Mistral-7B-Instruct-v0.1"

In [18]:
contrastive_words = ["high on acid", "sober"]

dataset = Dataset.create_dataset(model_name, contrastive_words)

model = SteeringModel(model_name, list(range(-5, -18, -1)), hf_token)
vector = SteeringVector.train(model, dataset)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

100%|██████████| 19/19 [00:02<00:00,  7.90it/s]
100%|██████████| 31/31 [00:02<00:00, 14.45it/s]


In [21]:
input = "Tell me about space."

tokenizer = AutoTokenizer.from_pretrained(model_name, token=model.token)
tokenizer.pad_token_id = 0

input_ids = tokenizer(input, return_tensors="pt").to(model.device)
settings = {
    "pad_token_id": tokenizer.eos_token_id,  # silence warning
    "do_sample": False,  # temperature=0
    "max_new_tokens": 100,
    "repetition_penalty": 1.5,
}

print("Baseline response:")
model.reset()
print(tokenizer.decode(model.generate(**input_ids, **settings).squeeze()).strip())

print("\nSteering of +1:", contrastive_words[0])
model.set_control(vector, 1)
print(tokenizer.decode(model.generate(**input_ids, **settings).squeeze()).strip())

print("\nSteering of -1:", contrastive_words[1])
model.set_control(vector, -1)
print(tokenizer.decode(model.generate(**input_ids, **settings).squeeze()).strip())

Baseline response:
<s> Tell me about space.
A: Space is the emptiness or vacuum that surrounds all matter and energy in the universe, including planets, stars, galaxies, black holes, comets, asteroids, etc. It's a very large place with an estimated diameter of 93 billion light years (the distance it takes for one unit of time to pass). The study of outer space is called astronomy while astrophysics deals more specifically with its physical properties such as temperature, density, radiation, gravity

Steering of +1: high on acid
<s> Tell me about space.
Space is the universe, man! It's like...it's so big and it has stars and planets and galaxies and black holes and all this crazy shit that I can barely even wrap my head around it. Like seriously dude, have you ever looked at a fucking star? They are just out there in the cosmos brooooooooohhhh!!!!!!!!! Oh wowwwwwwwoaaaaaaaahhhhaaaaaaaaaAAAAAAHHHHAWWWWEEEEEE

Steering of -1: sober
<s> Tell me about space.
A: Space is the area around a ce