In [6]:
import torch
from torch import Tensor
from transformer_lens import HookedTransformer, ActivationCache
from transformer_lens.hook_points import HookPoint
from transformer_lens.utils import get_act_name
from jaxtyping import Float, Int
from typing import List, Optional, Tuple
import sys
#from jax import typing
from pympler import asizeof
#asizeof(model)/(2**30) size in GiB


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('device =', device)
torch.set_grad_enabled(False)

# List all available GPUs
if torch.cuda.is_available():
    for i in range(torch.cuda.device_count()):
        print(f"Device {i}: {torch.cuda.get_device_name(i)}")
    print(f"Current CUDA device: {torch.cuda.current_device()}")

else:
    print("CUDA is not available. Listing CPUs instead.")
    print(torch.__version__)

device = cpu
CUDA is not available. Listing CPUs instead.
2.2.0+cu121


In [7]:
# Happy
context_1 = '''Bob: Hey Alice, how was your day today?
Alice: Oh, Bob, it was fantastic! I'm still riding the high from it all!
Bob: That sounds wonderful! What happened to make it so great?
Alice: Well, for starters, the weather was absolutely perfect. Not too hot, not too cold, just the kind of day you want to bottle up and keep forever. And then, on my way to work, my favorite song played on the radio. It felt like a sign that the day was going to be amazing.
Bob: I love when that happens! It's like the universe is giving you a personal thumbs up. What else happened?
Alice: Oh, it gets better. When I got to work, I found out that I received the promotion I've been hoping for. It was such a surprise! I've worked so hard for this, Bob, and it finally paid off.
Bob: Alice, that's incredible news! Congratulations on the promotion! You totally deserve it.
Alice: Thank you so much! And there's more. During lunch, I went out with a few colleagues to celebrate, and we ended up having the best time. The food was delicious, and the company was even better.
Bob: Sounds like a perfect day from start to finish.
Alice: It really was. And to cap it all off, when I got home, I found a package waiting for me. The book I've been wanting to read for months was finally released, and my copy arrived. I can't wait to dive into it.
Bob: Wow, what a day! You've got the promotion, great food, good company, and a new book. It's like everything aligned for you today.
Alice: In one word, I was so "'''

# Angry
context_2 = '''Bob: Hey Alice, how was your day?
Alice: Oh, don't even get me started. It was absolutely infuriating!
Bob: Really? What happened that got you so angry?
Alice: Where do I even begin? First, the traffic was a nightmare. I was stuck in my car for what felt like an eternity. And then, when I finally got to work, the coffee machine was broken. Can you believe it? No coffee!
Bob: That sounds rough. No coffee can definitely start the day on a wrong note.
Alice: Exactly! And as if that wasn't enough, my computer decided to crash right before I was about to save a crucial report. Hours of work just vanished. I had to start all over again.
Bob: That's terrible, Alice. I'm really sorry to hear that. Computers can be so unreliable when you need them the most.
Alice: And to top it all off, during lunch, I spilled my meal all over my new shirt. It's like the universe was conspiring against me today. I'm just so fed up with everything!
Bob: I can only imagine how frustrating all of that must have been. If there's anything I can do to help or if you need someone to vent to, I'm here for you.
Alice: In one word, I was so "'''

In [8]:
def get_activations(model: str,
                    prompt: str
) -> Float[Tensor, '...']:
  """ Returns the activations of a model on a input prompt.
  """
  _, cache = model.run_with_cache(prompt)
  return cache


def patch_activations(
    target_model: HookedTransformer,
    source_model: HookedTransformer,
    source_position: int,
    target_position: int,
    layer: int,
    target_prompt: str,
    source_cache: ActivationCache,
    activation_type: str = 'resid_pre'
):
    """ Patches an activation vector into the target model.
    """

    source_cache = source_cache[activation_type, layer]

    def hook_fn(target_activations: Float[Tensor, '...'],
                hook: HookPoint
    ) -> Float[Tensor, '...']:
        target_activations[:,target_position,:] = source_cache[:,source_position,:]
        return target_activations


    target_logits = target_model.run_with_hooks(
        target_prompt,
        return_type="logits",
        fwd_hooks=[
            (get_act_name(activation_type, layer), hook_fn)
        ]
    )

    predicted_tokens = target_logits.argmax(dim=-1).squeeze()[:-1]

    return predicted_tokens

In [8]:
model = HookedTransformer.from_pretrained("pythia-2.8b", device=device)
_, cache = model.run_with_cache(context_1)

layers = [i for i in range(0, model.cfg.n_layers)]

for layer in layers:
  predicted_tokens = patch_activations(target_model=model,
                                    source_model=model,
                                    source_position=-1,
                                    target_position=-1,
                                    layer=layer,
                                    target_prompt=context_2,
                                    source_cache=cache)

  next_str_token = model.to_str_tokens(predicted_tokens[-1])
  print('Layer:', layer, '-', next_str_token)
  torch.cuda.empty_cache()
  model.reset_hooks()


del cache
del model

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-2.8b into HookedTransformer
Layer: 0 - [' angry']
Layer: 1 - [' angry']
Layer: 2 - [' angry']
Layer: 3 - [' angry']
Layer: 4 - [' angry']
Layer: 5 - [' angry']
Layer: 6 - [' angry']
Layer: 7 - [' angry']
Layer: 8 - [' angry']
Layer: 9 - [' angry']
Layer: 10 - [' angry']
Layer: 11 - [' angry']
Layer: 12 - [' angry']
Layer: 13 - [' angry']
Layer: 14 - [' angry']
Layer: 15 - [' angry']
Layer: 16 - [' angry']
Layer: 17 - [' angry']
Layer: 18 - [' angry']
Layer: 19 - [' angry']
Layer: 20 - [' angry']
Layer: 21 - [' angry']
Layer: 22 - [' angry']
Layer: 23 - [' angry']
Layer: 24 - [' angry']
Layer: 25 - [' angry']
Layer: 26 - [' angry']
Layer: 27 - [' angry']
Layer: 28 - [' angry']
Layer: 29 - [' angry']
Layer: 30 - [' angry']
Layer: 31 - [' angry']


In [9]:
model = HookedTransformer.from_pretrained("pythia-2.8b", device=device)
_, cache = model.run_with_cache(context_2)


layers = [i for i in range(0, model.cfg.n_layers)]

for layer in layers:
  predicted_tokens = patch_activations(target_model=model,
                                    source_model=model,
                                    source_position=-1,
                                    target_position=-1,
                                    layer=layer,
                                    target_prompt=context_1,
                                    source_cache=cache)

  next_str_token = model.to_str_tokens(predicted_tokens[-1])
  print('Layer:', layer, '-', next_str_token)
  torch.cuda.empty_cache()
  model.reset_hooks()
  
del cache
del model

model.safetensors: 100%|██████████| 5.68G/5.68G [02:52<00:00, 33.0MB/s]
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-2.8b into HookedTransformer
Layer: 0 - [' lucky']
Layer: 1 - [' lucky']
Layer: 2 - [' lucky']
Layer: 3 - [' lucky']
Layer: 4 - [' lucky']
Layer: 5 - [' lucky']
Layer: 6 - [' lucky']
Layer: 7 - [' lucky']
Layer: 8 - [' lucky']
Layer: 9 - [' lucky']
Layer: 10 - [' lucky']
Layer: 11 - [' lucky']
Layer: 12 - [' lucky']
Layer: 13 - [' lucky']
Layer: 14 - [' lucky']
Layer: 15 - [' lucky']
Layer: 16 - [' lucky']
Layer: 17 - [' lucky']
Layer: 18 - [' lucky']
Layer: 19 - [' lucky']
Layer: 20 - [' lucky']
Layer: 21 - [' lucky']
Layer: 22 - [' lucky']
Layer: 23 - [' lucky']
Layer: 24 - [' lucky']
Layer: 25 - [' lucky']
Layer: 26 - [' lucky']
Layer: 27 - [' lucky']
Layer: 28 - [' lucky']
Layer: 29 - [' lucky']
Layer: 30 - [' lucky']
Layer: 31 - [' lucky']


In [10]:
# Alice
story_1 = '''Here is a short story. Read it carefully and answer the questions below with a keyword 
from the text.

Alice was a young woman with a keen sense of adventure, living in the heart of London.
Her life was a blend of the modern hustle and the city's age-old mysteries, nestled within the winding 
streets and historic buildings of her neighborhood. Alice worked at a small, independent bookstore 
nestled between towering skyscrapers and ancient pubs, a place that seemed to resist the passage of time. 
It was her sanctuary, filled with the scent of old books and the quiet whispers of stories waiting to be 
discovered. Despite her love for the quiet life, Alice couldn't shake off the feeling that there was 
something more, a deeper mystery that lay hidden beneath the surface of her daily existence. It was a 
feeling that would soon lead her on an unexpected journey.

Answer the question below.

Question: Where is the story?

Answer: In the city of '''

# Bob
story_2 = '''Here is a short story. Read it carefully and answer the questions below with a keyword 
from the text.

Bob is an artist through and through, whose spirit and creativity are as vibrant and lively 
as the city of Paris itself. He resides in a charming, light-filled studio apartment in Montmartre, 
surrounded by the echoes of the great artists who once roamed its cobblestone streets. Bob's days are 
spent wandering the city with his sketchbook in hand, capturing the essence of Parisian life—the bustling 
cafés, the serene parks, the historic bridges over the Seine—with quick, deft strokes of his pencil.
By night, his small studio transforms into a hive of artistic activity. Canvases of all sizes lean against 
the aged plaster walls, each one a testament to Bob's love for the city and its endless inspiration.

Answer the question below.

Question: Who is in the story?

Answer: The character is '''

In [11]:
model = HookedTransformer.from_pretrained("pythia-2.8b", device=device)
_, cache = model.run_with_cache(story_1)


layers = [i for i in range(0, model.cfg.n_layers)]

for layer in layers:
  predicted_tokens = patch_activations(target_model=model,
                                    source_model=model,
                                    source_position=-1,
                                    target_position=-1,
                                    layer=layer,
                                    target_prompt=story_2,
                                    source_cache=cache)

  next_str_token = model.to_str_tokens(predicted_tokens[-1])
  print('Layer:', layer, '-', next_str_token)
  torch.cuda.empty_cache()

model.reset_hooks()
del cache
del model

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-2.8b into HookedTransformer
Layer: 0 - [' Bob']
Layer: 1 - [' Bob']
Layer: 2 - [' Bob']
Layer: 3 - [' Bob']
Layer: 4 - [' Bob']
Layer: 5 - [' Bob']
Layer: 6 - [' Bob']
Layer: 7 - [' Bob']
Layer: 8 - [' Bob']
Layer: 9 - [' Bob']
Layer: 10 - [' Bob']
Layer: 11 - [' Bob']
Layer: 12 - [' Bob']
Layer: 13 - [' Bob']
Layer: 14 - [' Bob']
Layer: 15 - [' Bob']
Layer: 16 - [' Bob']
Layer: 17 - [' Bob']
Layer: 18 - [' Bob']
Layer: 19 - [' Bob']
Layer: 20 - [' Bob']
Layer: 21 - [' Bob']
Layer: 22 - [' Bob']
Layer: 23 - [' Bob']
Layer: 24 - [' Bob']
Layer: 25 - [' Bob']
Layer: 26 - [' Bob']
Layer: 27 - [' Bob']
Layer: 28 - [' Bob']
Layer: 29 - [' Bob']
Layer: 30 - [' Bob']
Layer: 31 - [' Bob']
