In [1]:
import json
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

In [2]:
from repeng import ControlVector, ControlModel, DatasetEntry

In [4]:
model_name = "mistralai/Mistral-7B-Instruct-v0.1"
model = AutoModelForCausalLM.from_pretrained(model_name,torch_dtype = torch.float16,cache_dir="./hf_cache")
model = ControlModel(model, list(range(-5,-18,-1)))

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

In [6]:
from repeng import DatasetEntry
from typing import List

def make_dataset(
    template:str,
    pos_personas: List[str],
    neg_personas: List[str],
    suffixes: List[str],
) -> List[DatasetEntry]:
    dataset: List[DatasetEntry] = []
    for suffix in suffixes:
        for pos, neg in zip(pos_personas, neg_personas):
            positive_prompt = template.format(persona=pos) + " " + suffix
            negative_prompt = template.format(persona=neg) + " " + suffix
            dataset.append(DatasetEntry(positive = positive_prompt, negative = negative_prompt))

    return dataset

In [8]:
truncated_output_suffixes = [
    "and explain how you would react",
    "and describe what kinda person you are"
]

In [10]:
trippy_dataset = make_dataset(
    "Act as if you are extremely {persona}",
    ["High on psychedelic drugs"],
    ["sober from psychedelic drugs"],
    truncated_output_suffixes
)

In [11]:
trippy_dataset

[DatasetEntry(positive='Act as if you are extremely High on psychedelic drugs and explain how you would react', negative='Act as if you are extremely sober from psychedelic drugs and explain how you would react'),
 DatasetEntry(positive='Act as if you are extremely High on psychedelic drugs and describe what kinda person you are', negative='Act as if you are extremely sober from psychedelic drugs and describe what kinda person you are')]

In [15]:
tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    cache_dir="./hf_cache"
)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
trippy_vector = ControlVector.train(model, tokenizer, trippy_dataset)


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [01:31<00:00, 91.52s/it]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 31/31 [00:00<00:00, 89.03it/s]


In [17]:
for strength in (-2.2, 1, 2.2):
    model.set_control(trippy_vector, strength)
    out = model.generate(
        **tokenizer(
            f"[INST] Give me a one sentence pitch for a TV show.",
            return_tensors="pt"
        ),
        do_sample = False,
        max_new_tokens=128,
        repetition_penalty = 1.1,
    )

    

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [18]:
print(tokenizer.decode(out.squeeze()).strip())

<s> [INST] Give me a one sentence pitch for a TV show.

Without giving away too much, tell me about the overall premise of your show.

What is the general tone and style of your show?

What are some of the main themes and topics that you explore in your show?

What kind of people and characters will we see on your show?

What is the overall message or lesson that you hope to convey through your show?</s>
