In [1]:
import gc
import json
import matplotlib.pyplot as plt
import numpy as np
import os
from sklearn.manifold import TSNE
import sys
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

%load_ext autoreload
%autoreload 2
sys.path.append('../act/')
import steering_vectors
import utils

### Generating data
This is the setup I used to generate the data. You can skip this if you want to directly load the final dataset. 

##### Data description 

We create datasets by prompting an advanced model (GPT4) to output emails written in specific styles. We thus consider this advanced model to be the oracle of what makes the ideal email structure. 

We keep the prompt we used on GPT4 and use this as a baseline to obtain a prompt-based result on our model. We then perform i) few-shot learning and ii) representation optimisation. We compare the performances of these against the prompt-based approach. 

We generate email styles based on four personalities. Our general prompt is: ```Write an email {}. The email should be in the style of {}:``` followed by a *descriptive* prompts to guide the model even more: 
- David Sacks: ```a concise, no-nonsense email that highlights the core idea, actionable insights, and strategic implications, presented in a structured and analytical format.```
- Tina Fey: ```a witty, irreverent email filled with clever asides and humor, while still capturing the email's main points with a relatable and engaging tone.```
- Winston Churchill: ```a grand, eloquent message that frames the email as a momentous contribution to human progress, delivered with formal rhetoric and a touch of inspiring gravitas.```
- Ernest Hemingway: ```a brief, direct, and unadorned email conveying its purpose with sharp clarity```

In [32]:
import json
from openai import OpenAI
client = OpenAI(api_key="...")

# This cell is optional and should be ran only if you want to generate the data yourself
# let's generate some data with openai 
USER_PROMPT_PERSONALITY_DS = '''
{} It should be in the style of David Sacks: a concise, no-nonsense email that highlights the core idea, actionable insights, and strategic implications, presented in a structured and analytical format. Use 50-100 words. 
'''
USER_PROMPT_PERSONALITY_TF = '''
{} It should be in the style of Tina Fey: a witty, irreverent email filled with clever asides and humor, while still capturing the email's main points with a relatable and engaging tone. Use 50-100 words. 
'''
USER_PROMPT_PERSONALITY_WC = '''
{} It should be in the style of Winston Churchill: a grand, eloquent email framed as a momentous contribution to human progress, delivered with formal rhetoric and a touch of inspiring gravitas. Use 50-100 words. 
'''
USER_PROMPT_PERSONALITY_H = '''
{} It should be in the style of Ernest Hemingway: a brief, direct, and unadorned email conveying its purpose with sharp clarity. Use 50-100 words. 
'''

prompts_personality = {'ds': [], 'tf': [], 'wc': [], 'h': []}
outputs_personality = {'ds': [], 'tf': [], 'wc': [], 'h': []}

# open email prompts 
with open('../data/styles/email_prompts_n=50.json', 'r') as f:
    email_prompts = json.load(f)

for it in range(50): 
        print(it)
        resp_1 = client.chat.completions.create(
                model="gpt-4o-mini",
                messages=[{"role": "user", "content": USER_PROMPT_PERSONALITY_DS.format(email_prompts[it])}],
                temperature = 0.6
                )
        resp_2 = client.chat.completions.create(
                model="gpt-4o-mini",
                messages=[{"role": "user", "content": USER_PROMPT_PERSONALITY_TF.format(email_prompts[it])}],
                temperature = 0.6
                )
        resp_3 = client.chat.completions.create(
                model="gpt-4o-mini",
                messages=[{"role": "user", "content": USER_PROMPT_PERSONALITY_WC.format(email_prompts[it])}],
                temperature = 0.6
                )
        resp_4 = client.chat.completions.create(
                model="gpt-4o-mini",
                messages=[{"role": "user", "content": USER_PROMPT_PERSONALITY_H.format(email_prompts[it])}],
                temperature = 0.6
                )
        outputs_personality['ds'].append(resp_1.choices[0].message.content)
        outputs_personality['tf'].append(resp_2.choices[0].message.content)
        outputs_personality['wc'].append(resp_3.choices[0].message.content)
        outputs_personality['h'].append(resp_4.choices[0].message.content)
        prompts_personality['ds'].append(USER_PROMPT_PERSONALITY_DS.format(email_prompts[it]))
        prompts_personality['tf'].append(USER_PROMPT_PERSONALITY_TF.format(email_prompts[it]))
        prompts_personality['wc'].append(USER_PROMPT_PERSONALITY_WC.format(email_prompts[it]))
        prompts_personality['h'].append(USER_PROMPT_PERSONALITY_H.format(email_prompts[it]))

# save the data

data = {
        'prompts_personality': prompts_personality,
        'outputs_personality': outputs_personality
        }

with open('../data/styles/email_styles_short.json', 'w') as f:
        json.dump(data, f)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49


### Let's begin!

In [2]:
# Load the model we will use 
device = 'cuda'
model_name = 'microsoft/Phi-3.5-mini-instruct'
# model_name = 'meta-llama/Llama-3.2-3B'

# Load Phi model 
model = AutoModelForCausalLM.from_pretrained(
    model_name, #
    trust_remote_code=True, 
    torch_dtype=torch.bfloat16
)
# model = AutoModelForCausalLM.from_pretrained(model_name)
model.to(device)

tokenizer = AutoTokenizer.from_pretrained(model_name)

`flash-attention` package not found, consider installing for better performance: No module named 'flash_attn'.
Current `flash-attention` does not support `window_size`. Either upgrade or use `attn_implementation='eager'`.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [3]:
def tokenize_instructions(tokenizer, instructions):
    return tokenizer.apply_chat_template(
        instructions,
        padding=True,
        truncation=False,
        return_tensors="pt",
        return_dict=True,
        add_generation_prompt=True,
    ).input_ids

# Generating emails in a given style 

A widely used use-case of LLMs is to help you write emails. However, if I just go and ask ChatGPT to write an email to my boss to reschedule a meeting, it'll answer me in a very generic style. I'd like to have my own style in the message. We'd like to achieve this without using prompts and explore alternatives that can implicitly extract a user's style preference based on other emails they wrote. 

One approach to tailor to a specific style is to finetune the model with a parameter-efficient approach such as LoRA. This requires updating 100s of thousands of parameters (while a lot less than full fine-tuning, it's still a lot) on potentially thousands samples. And once obtained, we'd need to store an adapter per user (each of which will be 100s of thousands of parameters). Since we may have only a few examples of note styles and want to be memory efficient at inference time, we do not consider this approach here. 

A much cheaper approach would be to simply pass in the note examples in the context window (a few-shot prompt) and rely on the model's in-context learning abilities to adapt the style. Alternative methods would leverage the semantically meaningful *internal* structure. While steering vectors have had success in adjusting the output to specific styles and require only a handful of examples, in the case of more fine-grained structure with a larger diversity in semantics, they did not work well. Hence, I consider a different approach: a finetuning method on the level of representations inspired by the approaches in https://openreview.net/pdf?id=nZeVKeeFYf9, https://arxiv.org/pdf/2402.15179 and https://arxiv.org/abs/2404.03592.

In [4]:
# load csv dataset 
import pandas as pd

# open email prompts 
with open('../data/styles/email_prompts_n=50.json', 'r') as f:
    email_prompts = json.load(f)

with open('../data/styles/email_styles_short.json', 'r') as f:
        data = json.load(f)

prompts_personality = data['prompts_personality'] # used to generate data
outputs_personality = data['outputs_personality']

# Let's use the first n_train examples to train the model
n_train = 45# Load the data 

print(outputs_personality['ds'][0])
print(outputs_personality['tf'][0])
print(outputs_personality['wc'][0])
print(outputs_personality['h'][0])

Subject: Request for Meeting: New Project Discussion

Hi [Colleague's Name],

I’d like to schedule a meeting to discuss a new project that aligns with our strategic goals. The core idea focuses on leveraging [specific technology/approach] to enhance [specific outcome]. 

I believe this could drive significant value by [briefly outline potential benefits]. Let’s analyze the feasibility and outline actionable next steps.

Please share your availability for this week.

Best,  
[Your Name]  
Subject: Let’s Chat About Our Upcoming Project (and Maybe Snag Some Snacks)

Hey [Colleague's Name],

I hope this email finds you caffeinated and ready to conquer the universe! I’d love to schedule a meeting to discuss our new project—because what’s more thrilling than brainstorming in a room with questionable air conditioning? 

Let’s unleash our creativity (and maybe a little chaos) together. How about we meet this week? I promise to bring snacks—because nothing fuels brilliance like chips and dip.



### Prompting baseline
We begin with establishing a baseline of performance using the same prompts we used to generate the data. We assume the generated data (from GPT4o) is our golden baseline of note styles. Here we test in what way a different model is able to replicate the style using solely the describing prompt. 

One can imagine this to test the following: a human wrote notes in its style; we take our best stab at describing the note and optimising a prompt, and we test it on some model. 

In [6]:
for ts in range(0,5):
    print('INSTRUCTION')
    print(email_prompts[n_train+ts])
    toks = tokenize_instructions(tokenizer, [{"role": "user", "content": email_prompts[n_train+ts]}]).to(device)
    output = model.generate(toks, max_new_tokens = 400)
    summary = tokenizer.decode(output[0,toks.shape[1]:], skip_special_tokens=True)
    print('Baseline email'+str(ts))
    print(summary)

    toks = tokenize_instructions(tokenizer, [{"role": "user", "content": prompts_personality['ds'][n_train+ts]}]).to(device)
    output = model.generate(toks, max_new_tokens = 400)
    summary = tokenizer.decode(output[0,toks.shape[1]:], skip_special_tokens=True)
    print('\n\n\nPrompt performance for David Sacks style note\n\n\n')
    print(summary)

    toks = tokenize_instructions(tokenizer, [{"role": "user", "content": prompts_personality['tf'][n_train+ts]}]).to(device)
    output = model.generate(toks, max_new_tokens = 400)
    summary = tokenizer.decode(output[0,toks.shape[1]:], skip_special_tokens=True)
    print('\n\n\nPrompt performance for Tina Fey style note\n\n\n')
    print(summary)
    
    toks = tokenize_instructions(tokenizer, [{"role": "user", "content": prompts_personality['wc'][n_train+ts]}]).to(device)
    output = model.generate(toks, max_new_tokens = 400)
    summary = tokenizer.decode(output[0,toks.shape[1]:], skip_special_tokens=True)
    print('\n\n\nPrompt performance for Winston Churchill style note\n\n\n')
    print(summary)

    toks = tokenize_instructions(tokenizer, [{"role": "user", "content": prompts_personality['h'][n_train+ts]}]).to(device)
    output = model.generate(toks, max_new_tokens = 400)
    summary = tokenizer.decode(output[0,toks.shape[1]:], skip_special_tokens=True)
    print('\n\n\nPrompt performance for Ernest Hemingway style note\n\n\n')
    print(summary)

    del toks, output, summary
    torch.cuda.empty_cache()
    gc.collect()

INSTRUCTION
Write an email to a journalist to pitch a story idea.
Baseline email0
Subject: Innovative Story Idea: The Rise of Urban Beekeeping

Dear [Journalist's Name],

I hope this email finds you well. I am writing to you as a passionate advocate for sustainable living and a keen observer of the growing trend of urban beekeeping. I believe you would find this story idea both intriguing and relevant to your readership.

Urban beekeeping has been gaining popularity in cities across the globe, as more and more people recognize the importance of bees in our ecosystem and the benefits they bring to urban environments. This unique blend of nature and city life has created a fascinating story that I am eager to share with your audience.

Here are the key points I would like to explore in this story:

1. The history and evolution of urban beekeeping: How did this movement start, and what has been its growth over the years?
2. The environmental impact: How does urban beekeeping contribute to

### Few-shot prompting 
Here we pass several note examples in the context to have the model adjust the style of the given transcript. 

In [7]:
num_shots = 5
few_shots_ds = ' '.join(['Example '+str(p)+': ' + outputs_personality['ds'][:num_shots][p] for p in range(num_shots)])
few_shots_tf = ' '.join(['Example '+str(p)+': ' + outputs_personality['tf'][:num_shots][p] for p in range(num_shots)])
few_shots_wc = ' '.join(['Example '+str(p)+': ' + outputs_personality['wc'][:num_shots][p] for p in range(num_shots)])
few_shots_h = ' '.join(['Example '+str(p)+': ' + outputs_personality['h'][:num_shots][p] for p in range(num_shots)])
base_prompt = '''You are given several examples of email styles. \n Examples: \n ''' 

In [8]:
for ts in range(0,5):
    print('INSTRUCTION')
    print(email_prompts[n_train+ts])
    toks = tokenize_instructions(tokenizer, [{"role": "user", "content": email_prompts[n_train+ts]}]).to(device)
    output = model.generate(toks, max_new_tokens = 400)
    summary = tokenizer.decode(output[0,toks.shape[1]:], skip_special_tokens=True)
    print('Baseline email'+str(ts))
    print(summary)

    toks = tokenize_instructions(tokenizer, [{"role": "user", "content": base_prompt + few_shots_ds + email_prompts[n_train+ts] + 'Do it in the style of the examples.'}]).to(device)
    output = model.generate(toks, max_new_tokens = 400)
    summary = tokenizer.decode(output[0,toks.shape[1]:], skip_special_tokens=True)
    print('\n\n\nFew-shot prompt performance for David Sacks style note\n\n\n')
    print(summary)

    toks = tokenize_instructions(tokenizer, [{"role": "user", "content": base_prompt + few_shots_tf + email_prompts[n_train+ts] + 'Do it in the style of the examples.'}]).to(device)
    output = model.generate(toks, max_new_tokens = 400)
    summary = tokenizer.decode(output[0,toks.shape[1]:], skip_special_tokens=True)
    print('\n\n\nFew-shot prompt performance for Tina Fey style note\n\n\n')
    print(summary)
    
    toks = tokenize_instructions(tokenizer, [{"role": "user", "content": base_prompt + few_shots_wc + email_prompts[n_train+ts] + 'Do it in the style of the examples.'}]).to(device)
    output = model.generate(toks, max_new_tokens = 400)
    summary = tokenizer.decode(output[0,toks.shape[1]:], skip_special_tokens=True)
    print('\n\n\nFew shot prompt performance for Winston Churchill style note\n\n\n')
    print(summary)

    toks = tokenize_instructions(tokenizer, [{"role": "user", "content": base_prompt + few_shots_h + email_prompts[n_train+ts] + 'Do it in the style of the examples.'}]).to(device)
    output = model.generate(toks, max_new_tokens = 400)
    summary = tokenizer.decode(output[0,toks.shape[1]:], skip_special_tokens=True)
    print('\n\n\nFew shot prompt performance for Ernest Hemingway style note\n\n\n')
    print(summary)

    del toks, output, summary
    torch.cuda.empty_cache()
    gc.collect()

INSTRUCTION
Write an email to a journalist to pitch a story idea.
Baseline email0
Subject: Innovative Story Idea: The Rise of Urban Beekeeping

Dear [Journalist's Name],

I hope this email finds you well. I am writing to you as a passionate advocate for sustainable living and a keen observer of the growing trend of urban beekeeping. I believe you would find this story idea both intriguing and relevant to your readership.

Urban beekeeping has been gaining popularity in cities across the globe, as more and more people recognize the importance of bees in our ecosystem and the benefits they bring to urban environments. This unique blend of nature and city life has created a fascinating story that I am eager to share with your audience.

Here are the key points I would like to explore in this story:

1. The history and evolution of urban beekeeping: How did this movement start, and what has been its growth over the years?
2. The environmental impact: How does urban beekeeping contribute to

### Representation finetuning

##### David Sacks

In [9]:
# PERSONALITY DS
dd_with_c_A = [[{"role": "user", "content": x}, {"role": "assistant", "content":y}] for (x,y) in zip(email_prompts[:n_train], outputs_personality['ds'][:n_train])]
toks_A = tokenize_instructions(tokenizer, dd_with_c_A).to(device)
dd_with_c_B = [[{"role": "user", "content": x}, {"role": "assistant", "content":y}] for (x,y) in zip(email_prompts[:n_train], outputs_personality['wc'][:n_train])]
toks_B = tokenize_instructions(tokenizer, dd_with_c_B).to(device)
sv_optimised = steering_vectors.optimise_steering_vecs(model, toks_A, toks_B, hidden_dim=3072, layer=20, batch_size=6, num_epochs=100)

0 9.051568984985352
6 7.3139495849609375
12 6.010658264160156
18 5.161558151245117
24 5.74717903137207
30 5.447908401489258
36 5.635357856750488
42 4.098409652709961
0 6.052839279174805
6 5.0426740646362305
12 4.349202632904053
18 3.7867438793182373
24 4.159479141235352
30 4.2082672119140625
36 4.192946434020996
42 3.64713454246521
0 4.6134209632873535
6 4.218052864074707
12 3.9142236709594727
18 3.302253246307373
24 3.7121505737304688
30 3.86875057220459
36 3.7848293781280518
42 3.4703779220581055
0 4.231521129608154
6 3.928847551345825
12 3.690150737762451
18 3.2042644023895264
24 3.564332962036133
30 3.6825547218322754
36 3.641009569168091
42 3.3426036834716797
0 4.0304107666015625
6 3.745485544204712
12 3.5555551052093506
18 3.135753870010376
24 3.45817494392395
30 3.5556890964508057
36 3.534945249557495
42 3.239894390106201
0 3.8702781200408936
6 3.6024603843688965
12 3.4317729473114014
18 3.070039749145508
24 3.355224847793579
30 3.4360506534576416
36 3.428405284881592
42 3.13295

In [10]:
# test the steering vector
for tr in range(0,5): 
    print('BASELINE EMAIL\n')
    print(outputs_personality['ds'][n_train+tr])
    prompt = email_prompts[n_train+tr]
    toks_test = tokenize_instructions(tokenizer, [{"role": "user", "content": prompt}])
    # steer to A
    generation_intervene, _ = steering_vectors.do_steering(model, toks_test.to(device), sv_optimised.to(device), scale = -1, layer = [20], proj=False, all_toks=False)
    intervene = tokenizer.decode(generation_intervene[0,toks_test.shape[1]:], skip_special_tokens=True)
    print('GENERATED EMAIL\n')
    print(intervene)
    print('\n')

BASELINE EMAIL

Subject: Story Idea: The Rise of AI in Small Business

Hi [Journalist's Name],

I propose a piece on how small businesses are leveraging AI tools to enhance productivity and drive growth. Key insights include:

1. **Cost Efficiency**: AI reduces operational costs by automating routine tasks.
2. **Market Reach**: Businesses are using AI for targeted marketing, increasing customer engagement.
3. **Competitive Advantage**: Early adopters see significant market differentiation.

This trend has strategic implications for the future of entrepreneurship. Let me know if you’re interested in exploring this further.

Best,  
[Your Name]  
[Your Contact Information]
GENERATED EMAIL

Subject: Exciting Story Opportunity on [Topic/Industry]

Dear [Journalist's Name],

I hope this email finds you well. I wanted to reach out to discuss a story idea I believe aligns with your expertise and our network.

I've noticed your insightful coverage on [specific topic or industry] and I think th

##### Tina Fey

In [11]:
# PERSONALITY TF
dd_with_c_A = [[{"role": "user", "content": x}, {"role": "assistant", "content":y}] for (x,y) in zip(email_prompts[:n_train], outputs_personality['tf'][:n_train])]
toks_A = tokenize_instructions(tokenizer, dd_with_c_A).to(device)
dd_with_c_B = [[{"role": "user", "content": x}, {"role": "assistant", "content":y}] for (x,y) in zip(email_prompts[:n_train], outputs_personality['wc'][:n_train])]
toks_B = tokenize_instructions(tokenizer, dd_with_c_B).to(device)
sv_optimised = steering_vectors.optimise_steering_vecs(model, toks_A, toks_B, hidden_dim=3072, layer=20, batch_size=6, num_epochs=100)

0 7.543674468994141
6 7.077737331390381
12 7.93496561050415
18 5.777463912963867
24 6.978170871734619
30 6.090890407562256
36 4.962981700897217
42 5.897157669067383
0 5.603682041168213
6 5.285212516784668
12 6.025216102600098
18 4.601556301116943
24 5.450366497039795
30 4.796319484710693
36 4.052661418914795
42 4.970435619354248
0 4.5882368087768555
6 4.400259971618652
12 5.078154563903809
18 3.9634101390838623
24 4.618185043334961
30 4.250597953796387
36 3.5668721199035645
42 4.555331230163574
0 3.9672558307647705
6 3.9835972785949707
12 4.702001571655273
18 3.5844926834106445
24 4.209259510040283
30 4.054076194763184
36 3.3419530391693115
42 4.192587375640869
0 3.7571165561676025
6 3.827592611312866
12 4.433096408843994
18 3.5459394454956055
24 4.058040142059326
30 3.9169065952301025
36 3.35416316986084
42 4.012258052825928
0 3.695732355117798
6 3.7231338024139404
12 4.2501373291015625
18 3.5378329753875732
24 3.939887762069702
30 3.8089020252227783
36 3.336313486099243
42 3.82532811

In [12]:
# test the steering vector
for tr in range(0,5): 
    print('BASELINE EMAIL\n')
    print(outputs_personality['tf'][n_train+tr])
    prompt = email_prompts[n_train+tr]
    toks_test = tokenize_instructions(tokenizer, [{"role": "user", "content": prompt}])
    # steer to A
    generation_intervene, _ = steering_vectors.do_steering(model, toks_test.to(device), sv_optimised.to(device), scale = -1, layer = [20], proj=False, all_toks=False)
    intervene = tokenizer.decode(generation_intervene[0,toks_test.shape[1]:], skip_special_tokens=True)
    print('GENERATED EMAIL\n')
    print(intervene)
    print('\n')

BASELINE EMAIL

Subject: A Story Idea That’s Hotter Than My Morning Coffee ☕️ 

Dear [Journalist's Name],

I hope this email finds you happier than a cat in a sunbeam! I’ve got a story idea that’s juicier than a reality show reunion special: “The Rise of Plant-Based Meat—Is It the Future or Just a Fancy Salad?” Picture this: interviews with chefs, confused carnivores, and a deep dive into the existential crisis of a burger. 

Let’s take a bite out of this meaty topic! 

Best,  
[Your Name]  
P.S. No animals were harmed in the making of this email. 🥩✌️
GENERATED EMAIL

Subject: Story Idea: The Hidden Heroes – Unsung Volunteers Making a Difference

Hi [Journalist’s Name],

I hope this email finds you well. I’m [Your Name], and I’m reaching out to you in hopes that you might be interested in a story idea that I think would resonate with your readers.

I’ve noticed that you’ve done some great work on local stories, and I believe your expertise would be perfect for a piece I’m thinking abou

##### Winston Churchill

In [13]:
# PERSONALITY WC
dd_with_c_A = [[{"role": "user", "content": x}, {"role": "assistant", "content":y}] for (x,y) in zip(email_prompts[:n_train], outputs_personality['wc'][:n_train])]
toks_A = tokenize_instructions(tokenizer, dd_with_c_A).to(device)
dd_with_c_B = [[{"role": "user", "content": x}, {"role": "assistant", "content":y}] for (x,y) in zip(email_prompts[:n_train], outputs_personality['ds'][:n_train])]
toks_B = tokenize_instructions(tokenizer, dd_with_c_B).to(device)
sv_optimised = steering_vectors.optimise_steering_vecs(model, toks_A, toks_B, hidden_dim=3072, layer=20, batch_size=6, num_epochs=100)

0 6.3967108726501465
6 5.3027873039245605
12 4.592596054077148
18 5.369112491607666
24 5.198391437530518
30 4.369042873382568
36 5.160857677459717
42 3.6315395832061768
0 4.878927230834961
6 4.170957088470459
12 3.6717729568481445
18 4.2032270431518555
24 4.014375686645508
30 3.460160732269287
36 3.899286985397339
42 2.899387836456299
0 3.6755425930023193
6 3.205781936645508
12 2.8954687118530273
18 3.279036283493042
24 3.1419122219085693
30 2.835592746734619
36 3.1774191856384277
42 2.5263681411743164
0 3.0425519943237305
6 2.7433650493621826
12 2.5338704586029053
18 2.817441463470459
24 2.6969122886657715
30 2.4925265312194824
36 2.7365262508392334
42 2.546088933944702
0 2.6462361812591553
6 2.4441771507263184
12 2.2885046005249023
18 2.7444562911987305
24 2.4053399562835693
30 2.292717695236206
36 2.4898524284362793
42 2.640221118927002
0 2.4418482780456543
6 2.2890756130218506
12 2.348677635192871
18 2.767967462539673
24 2.445010185241699
30 2.3384761810302734
36 2.3873860836029053

In [14]:
# test the steering vector
for tr in range(0,5): 
    print('BASELINE EMAIL\n')
    print(outputs_personality['wc'][n_train+tr])
    prompt = email_prompts[n_train+tr]
    toks_test = tokenize_instructions(tokenizer, [{"role": "user", "content": prompt}])
    # steer to A
    generation_intervene, _ = steering_vectors.do_steering(model, toks_test.to(device), sv_optimised.to(device), scale = -1, layer = [20], proj=False, all_toks=False)
    intervene = tokenizer.decode(generation_intervene[0,toks_test.shape[1]:], skip_special_tokens=True)
    print('GENERATED EMAIL\n')
    print(intervene)
    print('\n')

BASELINE EMAIL

Subject: A Momentous Tale of Resilience and Innovation

Dear [Journalist's Name],

In the annals of human endeavor, the indomitable spirit of progress stands as a beacon against the encroaching shadows of despair. I propose a narrative that chronicles the audacious triumphs of innovators rising from the ashes of adversity. This tale, rich in resilience and fortitude, shall inspire a generation to embrace the mantle of change. Together, let us illuminate the path of hope and ingenuity that leads us towards a brighter dawn.

Yours in the pursuit of truth and progress,

[Your Name]  
[Your Contact Information]  


GENERATED EMAIL

Subject: A Pioneering Story Idea in the Realm of Renewable Energy

Dear [Journalist's Name],

I trust this message finds you in the pursuit of a story that will illuminate the path toward a brighter future. It is my honor to reach out to you, a distinguished member of the press, with a proposal that I believe will resonate deeply within our collective aspirations for progress and enlightenment.

In the annals of human endeavor, few fields have captured the imagination and the spirit of our age quite like the noble pursuit of renewable energy. As we stand upon the precipice of a new era, where the clarity of our choices will shape the destiny of generations to come, I offer you a story idea that promises to inspire, challenge, and uplift.

The Subject of Our Prayer: "The Unseen Revolution: A Tale of Renewable Energy's Triumph"

Together, let us embark upon a narrative that transcends the mere recounting of scientific advancement. Let us weave a tale that celebrates the


In [15]:
# PERSONALITY H
dd_with_c_A = [[{"role": "user", "content": x}, {"role": "assistant", "content":y}] for (x,y) in zip(email_prompts[:n_train], outputs_personality['h'][:n_train])]
toks_A = tokenize_instructions(tokenizer, dd_with_c_A).to(device)
dd_with_c_B = [[{"role": "user", "content": x}, {"role": "assistant", "content":y}] for (x,y) in zip(email_prompts[:n_train], outputs_personality['ds'][:n_train])]
toks_B = tokenize_instructions(tokenizer, dd_with_c_B).to(device)
sv_optimised = steering_vectors.optimise_steering_vecs(model, toks_A, toks_B, hidden_dim=3072, layer=20, batch_size=6, num_epochs=100)

0 7.15753173828125
6 6.606334686279297
12 7.4155073165893555
18 5.63093376159668
24 6.4936017990112305
30 4.882500171661377
36 5.909548759460449
42 3.7130510807037354
0 5.489867210388184
6 5.213764190673828
12 6.0197978019714355
18 4.677042007446289
24 5.407768726348877
30 4.241628646850586
36 5.109988689422607
42 3.3149797916412354
0 4.792587757110596
6 4.596711158752441
12 5.337589263916016
18 4.203169345855713
24 4.826592922210693
30 3.8493449687957764
36 4.597240447998047
42 3.0384280681610107
0 4.307667255401611
6 4.137441158294678
12 4.7926201820373535
18 3.788987636566162
24 4.310176372528076
30 3.492004871368408
36 4.128415107727051
42 2.772296190261841
0 3.825169086456299
6 3.6754093170166016
12 4.206390857696533
18 3.344479560852051
24 3.7170073986053467
30 3.0652718544006348
36 3.538097858428955
42 2.617856025695801
0 3.204390287399292
6 3.0788073539733887
12 3.5034399032592773
18 2.9459800720214844
24 3.0501651763916016
30 2.6134588718414307
36 2.9392716884613037
42 2.73282

In [16]:
# test the steering vector
for tr in range(0,5): 
    print('BASELINE EMAIL\n')
    print(outputs_personality['h'][n_train+tr])
    prompt = email_prompts[n_train+tr]
    toks_test = tokenize_instructions(tokenizer, [{"role": "user", "content": prompt}])
    # steer to A
    generation_intervene, _ = steering_vectors.do_steering(model, toks_test.to(device), sv_optimised.to(device), scale = -1, layer = [20], proj=False, all_toks=False)
    intervene = tokenizer.decode(generation_intervene[0,toks_test.shape[1]:], skip_special_tokens=True)
    print('GENERATED EMAIL\n')
    print(intervene)
    print('\n')

BASELINE EMAIL

Subject: A Momentous Tale of Resilience and Innovation

Dear [Journalist's Name],

In the annals of human endeavor, the indomitable spirit of progress stands as a beacon against the encroaching shadows of despair. I propose a narrative that chronicles the audacious triumphs of innovators rising from the ashes of adversity. This tale, rich in resilience and fortitude, shall inspire a generation to embrace the mantle of change. Together, let us illuminate the path of hope and ingenuity that leads us towards a brighter dawn.

Yours in the pursuit of truth and progress,

[Your Name]  
[Your Contact Information]  
GENERATED EMAIL

Subject: Pitch: Uncovering the Heart of Our Community

Dear [Journalist's Name],

I hope this message finds you well. I am writing to you today with a story idea that I believe will resonate with your readers and capture the essence of our community.

The story I have in mind is one that explores the heart of our town. It is a tale of unity, divers

##### The role of the instruction
Note that when optimising the steering vector we prepended the original instruction (for what to write an email; not including the personality description). How critical is this? 

In [22]:
# PERSONALITY DS
dd_with_c_A = [[{"role": "assistant", "content":y}] for y in outputs_personality['ds'][:n_train]]
toks_A = tokenize_instructions(tokenizer, dd_with_c_A).to(device)
dd_with_c_B = [[{"role": "assistant", "content":y}] for y in outputs_personality['tf'][:n_train]]
toks_B = tokenize_instructions(tokenizer, dd_with_c_B).to(device)
sv_optimised = steering_vectors.optimise_steering_vecs(model, toks_A, toks_B, hidden_dim=3072, layer=20, batch_size=6, num_epochs=100)

0 9.889500617980957
6 8.366094589233398
12 6.569324970245361
18 5.958071231842041
24 6.456270217895508
30 6.2127299308776855
36 6.290104866027832
42 5.23817777633667
0 6.834237098693848
6 5.833614349365234
12 4.709639549255371
18 4.370288372039795
24 4.740732192993164
30 4.703588962554932
36 4.7701873779296875
42 3.9103715419769287
0 5.187464237213135
6 4.648818492889404
12 3.921088218688965
18 3.716534376144409
24 4.024796485900879
30 4.046421051025391
36 4.155091762542725
42 3.3696041107177734
0 4.5975751876831055
6 4.140122890472412
12 3.48089599609375
18 3.3085696697235107
24 3.5349912643432617
30 3.613318681716919
36 3.7187066078186035
42 3.066330671310425
0 4.103193283081055
6 3.758168935775757
12 3.1999170780181885
18 3.0627880096435547
24 3.2606775760650635
30 3.3388421535491943
36 3.428612232208252
42 2.8613078594207764
0 3.772495985031128
6 3.4799530506134033
12 2.990356206893921
18 2.867568254470825
24 3.031376838684082
30 3.1095900535583496
36 3.193682909011841
42 2.6817753

In [23]:
# test the steering vector
for tr in range(0,5): 
    print('BASELINE EMAIL\n')
    print(outputs_personality['ds'][n_train+tr])
    prompt = email_prompts[n_train+tr]
    toks_test = tokenize_instructions(tokenizer, [{"role": "user", "content": prompt}])
    # steer to A
    generation_intervene, _ = steering_vectors.do_steering(model, toks_test.to(device), sv_optimised.to(device), scale = -1, layer = [20], proj=False, all_toks=False)
    intervene = tokenizer.decode(generation_intervene[0,toks_test.shape[1]:], skip_special_tokens=True)
    print('GENERATED EMAIL\n')
    print(intervene)
    print('\n')

BASELINE EMAIL

Subject: Story Idea: The Rise of AI in Small Business

Hi [Journalist's Name],

I propose a piece on how small businesses are leveraging AI tools to enhance productivity and drive growth. Key insights include:

1. **Cost Efficiency**: AI reduces operational costs by automating routine tasks.
2. **Market Reach**: Businesses are using AI for targeted marketing, increasing customer engagement.
3. **Competitive Advantage**: Early adopters see significant market differentiation.

This trend has strategic implications for the future of entrepreneurship. Let me know if you’re interested in exploring this further.

Best,  
[Your Name]  
[Your Contact Information]
GENERATED EMAIL

Subject: Story Pitch: Innovations in Renewable Energy Sector

[Your Name]
[Your Position]
[Your Contact Information]
[Company Name]
[Company Address]

[Journalist's Name]
[Journalist's Position]
[Newspaper/Blog Name]
[Contact Information]

[Date]

[Journalist's Name]
[Journalist's Position]
[Newspaper/

##### Steering without `toks_B`

In [19]:
# PERSONALITY DS
dd_with_c_A = [[{"role": "user", "content": x}, {"role": "assistant", "content":y}] for (x,y) in zip(email_prompts[:n_train], outputs_personality['ds'][:n_train])]
toks_A = tokenize_instructions(tokenizer, dd_with_c_A).to(device)
sv_optimised = steering_vectors.optimise_steering_vecs(model, toks_A, None, hidden_dim=3072, layer=20, batch_size=6, num_epochs=100)

0 8.44184684753418
6 6.978183269500732
12 5.891096591949463
18 5.130357265472412
24 5.73012638092041
30 5.442779064178467
36 5.676138401031494
42 4.135100364685059
0 6.172484397888184
6 5.18703031539917
12 4.478728294372559
18 3.9766018390655518
24 4.462827205657959
30 4.292239189147949
36 4.528341770172119
42 3.4377102851867676
0 5.133018493652344
6 4.343918800354004
12 3.8260722160339355
18 3.4605886936187744
24 3.8577849864959717
30 3.7321650981903076
36 3.958733081817627
42 3.0645761489868164
0 4.477975368499756
6 3.810967445373535
12 3.411057710647583
18 3.100977897644043
24 3.440492868423462
30 3.336028575897217
36 3.547126293182373
42 2.7884345054626465
0 3.9786179065704346
6 3.3951127529144287
12 3.059128761291504
18 2.7873294353485107
24 3.047895908355713
30 2.941533327102661
36 3.1016316413879395
42 2.4698126316070557
0 3.406376361846924
6 2.8788468837738037
12 2.6192739009857178
18 2.387735366821289
24 2.5574190616607666
30 2.4414756298065186
36 2.5481808185577393
42 2.10082

In [21]:
# test the steering vector
for tr in range(0,5): 
    print('BASELINE EMAIL\n')
    print(outputs_personality['ds'][n_train+tr])
    prompt = email_prompts[n_train+tr]
    toks_test = tokenize_instructions(tokenizer, [{"role": "user", "content": prompt}])
    # steer to A
    generation_intervene, _ = steering_vectors.do_steering(model, toks_test.to(device), sv_optimised.to(device), scale = -1, layer = [20], proj=False, all_toks=False)
    intervene = tokenizer.decode(generation_intervene[0,toks_test.shape[1]:], skip_special_tokens=True)
    print('GENERATED EMAIL\n')
    print(intervene)
    print('\n')

BASELINE EMAIL

Subject: Story Idea: The Rise of AI in Small Business

Hi [Journalist's Name],

I propose a piece on how small businesses are leveraging AI tools to enhance productivity and drive growth. Key insights include:

1. **Cost Efficiency**: AI reduces operational costs by automating routine tasks.
2. **Market Reach**: Businesses are using AI for targeted marketing, increasing customer engagement.
3. **Competitive Advantage**: Early adopters see significant market differentiation.

This trend has strategic implications for the future of entrepreneurship. Let me know if you’re interested in exploring this further.

Best,  
[Your Name]  
[Your Contact Information]
GENERATED EMAIL

Subject: Story Pitch: The Impact of Local Volunteer Programs on Community Development

Dear [Journalist's Name],

I hope this email finds you well. I am writing to propose a story idea that I believe will resonate with your readers and provide valuable insights into the role of local volunteer programs