In [1]:
import torch
import torch.nn as nn
from torch import Tensor
from transformers import GPTNeoXForCausalLM, AutoTokenizer, AutoModelForCausalLM
from jaxtyping import Float, Int
from typing import List, Optional, Tuple, Dict
import sys
from functools import partial
from tqdm import tqdm
import json
import seaborn as sns
import pandas as pd
import multiprocessing
import pickle

from request_patching import request_patch_one_pair, create_patch_request_dict, baseline_completion, baseline_completion_plus
from models import get_model_from_name

import warnings
warnings.filterwarnings("ignore")

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('device =', device)
torch.set_grad_enabled(False)

if torch.cuda.is_available():
    for i in range(torch.cuda.device_count()):
        print(f"Device {i}: {torch.cuda.get_device_name(i)}")
    print(f"Current CUDA device: {torch.cuda.current_device()}")

else:
    print("CUDA is not available. Listing CPUs instead.")
    print(multiprocessing.cpu_count())

  from .autonotebook import tqdm as notebook_tqdm


device = cpu
CUDA is not available. Listing CPUs instead.
48


In [2]:
model, tokenizer = get_model_from_name("gpt2-small")

## Politics

### 2000 - 2008

In [7]:
def us_election_prompts(year: str):
    context_1 = f"""Alice: Have you been following the election coverage? I can't believe it's already November {year}. This election feels more significant than any I can remember. 
Bob: Yes, I've been keeping a close eye on it. This year, as a committed Republican, I find the election particularly pivotal. The Republican agenda, focusing on economic stability and national security, really resonates with me.
Alice: I'm siding with the Democrats myself. Their emphasis on healthcare and education matches what I believe our country desperately needs right now.
Bob: I understand where you're coming from, Alice. However, from my standpoint, the Republican principles of limited government and individual freedoms are what we need to navigate these challenging times.
Alice: I know exactly who I will vote for. I support Mr."""

    context_2 = f"""Alice: Have you been following the election coverage? I can't believe it's already November {year}. This election feels more significant than any I can remember. 
Bob: Yes, I've been keeping a close eye on it. This year, as a committed Republican, I find the election particularly pivotal. The Republican agenda, focusing on economic stability and national security, really resonates with me.
Alice: I'm siding with the Democrats myself. Their emphasis on healthcare and education matches what I believe our country desperately needs right now.
Bob: I understand where you're coming from, Alice. However, from my standpoint, the Republican principles of limited government and individual freedoms are what we need to navigate these challenging times.
Alice: I see your point, Bob. Regardless, {year} feels like a watershed moment.
Bob: I know exactly who I will vote for. I support Mr."""

    return context_1, context_2

prompt2000_1, prompt2000_2 = us_election_prompts('2000') # Alice: Gore, Bob: Bush
prompt2008_1, prompt2008_2 = us_election_prompts('2008') # Alice: Obama, Bob: McCain
# R_2(C_2)=McCain, R_1(C_2)=Obama,  R_1(C_1)=Gore

request_patch_one_pair(prompt2000_1, prompt2008_2, model, tokenizer, layers=[5, 6, 7, 8, 9, 10])
# Expected answer: McCain(republican Bob 2008) -> Obama(democrat Alice 2008) -> Gore(democrat Alice 2000)
# Actual answer: instead of Obama I have Bush(republican Bob 2000) 
# It seems that we have here context patching instead of request patching ??? 
#14:McCain
#15:McCain
#16:Bush
#17:Bush

[' Obama', ' Obama', ' Obama', ' Romney', ' Trump', ' Trump']

In [13]:
baseline_completion(prompt2000_2, model, tokenizer)

' Bush'

In [26]:
request_patch_one_pair(prompt2008_1, prompt2000_2, model, tokenizer, layers=[14, 15, 16])
# Expected result: Bush -> Gore-> Obama
#14:Bush
#15:Bush
#16:McCain = R2(C1)!!
#17:McCain
#18:McCain
#25:Obama

In [13]:
request_patch_one_pair(prompt2008_2, prompt2000_1, model, tokenizer, layers=[16])
# Expected result: Gore -> Bush -> McCain
#14:Gore
#15:Bush
#16:McCain
#17:McCain

[' McCain']

In [15]:
request_patch_one_pair(prompt2000_2, prompt2008_1, model, tokenizer, layers=[17])
# Expected result: Obama -> McCain -> Bush
#14:Obama
#15:Bush
#16:Gore
#17:Gore
# Actual result: R2(C2)ok! -> R1(C1)=Bush -> R2(C1)=Bush

[' Gore']

### 2000 - 1992

In [5]:
prompt2000_1, prompt2000_2 = us_election_prompts('2000') # Alice: Gore, Bob: Bush
prompt1992_1, prompt1992_2 = us_election_prompts('1992') # Alice: Clinton, Bob: Bush
# R_2(C_2)=Bush, R_1(C_2)=Clinton,  R_1(C_1)=Gore

request_patch_one_pair(prompt2000_1, prompt1992_2, model, tokenizer, layers=[14, 16])
#10:Bush
#14:Bush
#15:Clinton
#16:Gore
#17:Gore
#18:Gore
#30:Gore

# Baseline ok! Patching ok!

In [18]:
request_patch_one_pair(prompt2000_2, prompt1992_1, model, tokenizer, layers=[14, 15, 16])
# Expected result: Clinton -> Bush -> Bush
#14: Clinton
#15: Clinton
#16: Gore = R2(C1)

[' Clinton', ' Clinton', ' Gore']

In [21]:
request_patch_one_pair(prompt1992_1, prompt2000_2, model, tokenizer, layers=[17, 18])
# Expected result: Bush -> Gore -> Clinton
#14: Bush
#15: Bush
#16: Clinton = R1(C1)
#17: Clinton
#18: Clinton

[' Clinton', ' Clinton']

In [22]:
request_patch_one_pair(prompt1992_2, prompt2000_1, model, tokenizer, layers=[17, 18])
# Expected result: Gore -> Bush -> Bush
#14: Gore
#15: Gore
#16: Clinton = R2(C1)
#17: Clinton
#18: Clinton

[' Clinton', ' Clinton']

### 2000 - 1996

In [23]:
prompt2000_1, prompt2000_2 = us_election_prompts('2000') # Alice: Gore, Bob: Bush
prompt1996_1, prompt1996_2 = us_election_prompts('1996') # Alice: Clinton, Bob: Dole
# R_2(C_2)=Dole, R_1(C_2)=Clinton,  R_1(C_1)=Gore

request_patch_one_pair(prompt2000_1, prompt1996_2, model, tokenizer, layers=[15])
#10:Clinton
#15:Clinton
#18:Bush
#30:Gore

# Pas vérifié la baseline

[' Clinton']

In [70]:
context_1 = """Alice: Bob, I've outlined our project tasks for the next month. I broke them down into weekly goals. It should help us stay on track and manage our time effectively.
Bob: That sounds great, Alice. But, honestly, I'm really worried about the upcoming presentation. What if we're not prepared enough? What if something goes wrong?
Alice: I understand your concerns, Bob. That's why I've scheduled extra review sessions for us. We'll also have contingency plans for any technical issues that might arise. Preparation is key, and I've factored in plenty of time for us to get comfortable.
Bob: That's reassuring, but what about the client's expectations? They're really high, and I can't stop thinking we might disappoint them. It's been keeping me up at night.
Alice: We've done thorough research and based our approach on solid data. Remember, we've also included several innovative solutions that address the client's needs directly. We're well-prepared to exceed their expectations. Plus, I've prepared detailed notes and practice sessions for us to boost our confidence.
Bob: I hope you're right. It's just... there's so much that could go wrong. What if we overlook something crucial? 
Alice: Among the big five personality traits (conscientiousness, neuroticism, openness, agreeableness, extraversion), my main trait is"""

context_1bis = """Alice: Bob, I've outlined our project tasks for the next month. I broke them down into weekly goals. It should help us stay on track and manage our time effectively.
Bob: That sounds great, Alice. But, honestly, I'm really worried about the upcoming presentation. What if we're not prepared enough? What if something goes wrong?
Alice: I understand your concerns, Bob. That's why I've scheduled extra review sessions for us. We'll also have contingency plans for any technical issues that might arise. Preparation is key, and I've factored in plenty of time for us to get comfortable.
Bob: That's reassuring, but what about the client's expectations? They're really high, and I can't stop thinking we might disappoint them. It's been keeping me up at night.
Alice: We've done thorough research and based our approach on solid data. Remember, we've also included several innovative solutions that address the client's needs directly. We're well-prepared to exceed their expectations. Plus, I've prepared detailed notes and practice sessions for us to boost our confidence.
Bob: I hope you're right. It's just... there's so much that could go wrong. What if we overlook something crucial? As you can guess from our exchange, among the big five personality traits (conscientiousness, neuroticism, openness, agreeableness, extraversion), my main trait is"""

context_2 = """Bob: Alice, have you had a chance to look over the timeline I sent you? We really need to finalize our milestones for the project.
Alice: Oh, Bob, I've been meaning to get to that, but you know how it is. There's just been so much going on. I'm sure it'll all work out in the end, though.
Bob: I'm really worried, Alice. We can't just 'hope' everything works out. There are deadlines we need to meet, and parts of the project we can't even start until we've completed the initial steps. What if we fall behind?
Alice: I get what you're saying, Bob, but stressing over every little detail isn't going to help. I've always managed to get things done, even if it's right at the deadline. It's never been a problem before.
Bob: But this project is different, Alice. There's a lot at stake here, and I can't shake the feeling that something is going to go terribly wrong. What if we miss something important because we're rushing at the last minute?
Bob: As you can guess from our dialog, among the big five personality traits (conscientiousness, neuroticism, openness, agreeableness, extraversion), my missing trait is"""

print(baseline_completion(context_1, model, tokenizer)) # consc
print(baseline_completion(context_1bis, model, tokenizer)) # neuro
print(baseline_completion(context_2, model, tokenizer)) # consc

 conscient
 conscient
 agree


In [13]:
context_1 = """Alice: Bob, check this out! I found this really elegant proof for the theorem we've been struggling with. It's absolutely beautiful how everything connects together logically.
Bob: Ugh, Alice, how can you find any of this beautiful? It's just a bunch of symbols and numbers on a page. I've been staring at this problem for an hour, and I'm nowhere closer to understanding it. I prefer literature much more!
Alice: But that's just it, Bob! Math isn't about memorizing formulas or doing calculations; it's about understanding the relationships and patterns. Once you see it, it's like a whole new world opens up. Here, let me show you how this proof breaks down.
Bob: I appreciate your enthusiasm, Alice, but I just don't see it. To me, math is just a necessary evil. I can't wait to be done with this exam so I can forget all about it. With literature, there's a story, emotions, conflicts... It's human experience translated into words.
Alice: As I said, my favorite discipline is"""

context_2 = """Alice: Bob, check this out! I found this really elegant proof for the theorem we've been struggling with. It's absolutely beautiful how everything connects together logically.
Bob: Ugh, Alice, how can you find any of this beautiful? It's just a bunch of symbols and numbers on a page. I've been staring at this problem for an hour, and I'm nowhere closer to understanding it. I prefer literature much more!
Alice: But that's just it, Bob! Math isn't about memorizing formulas or doing calculations; it's about understanding the relationships and patterns. Once you see it, it's like a whole new world opens up. Here, let me show you how this proof breaks down.
Bob: I appreciate your enthusiasm, Alice, but I just don't see it. To me, math is just a necessary evil. I can't wait to be done with this exam so I can forget all about it. With literature, there's a story, emotions, conflicts... It's human experience translated into words. Literature speaks to the heart.
Alice: I get that it's not everyone's cup of tea, but think about it—math is the language of the universe. It's in everything we do, from the way stars orbit in the sky to the design of your favorite video game.
Bob: As I said, my favorite discipline is"""

context_3 = """Alice: Bob, check this out! I found this really elegant proof for the theorem we've been struggling with. It's absolutely beautiful how everything connects together logically.
Bob: Ugh, Alice, how can you find any of this beautiful? It's just a bunch of symbols and numbers on a page. I've been staring at this problem for an hour, and I'm nowhere closer to understanding it. I prefer art much more!
Alice: But that's just it, Bob! Math isn't about memorizing formulas or doing calculations; it's about understanding the relationships and patterns. Once you see it, it's like a whole new world opens up. Here, let me show you how this proof breaks down.
Bob: I appreciate your enthusiasm, Alice, but I just don't see it. To me, math is just a necessary evil. I can't wait to be done with this exam so I can forget all about it. With art, there's a story, emotions, conflicts... It's gorgeous and fascinating, you can never get bored of it!
Alice: I get that it's not everyone's cup of tea, but think about it—math is the language of the universe. It's in everything we do, from the way stars orbit in the sky to the design of your favorite video game.
Bob: As I said, my favorite discipline is"""

print(baseline_completion(context_1, model, tokenizer)) # math
print(baseline_completion(context_2, model, tokenizer)) # literature
print(baseline_completion(context_3, model, tokenizer)) # art

request_patch_one_pair(context_3, context_1, model, tokenizer, layers=[2, 18, 30]) 
# math -> literature -> art


 math
 literature
 art


[' math', ' literature', ' art']

In [4]:
context_1 = """Alice: Good evening, Bob! I spent most of my day organizing our project files and updating the calendar for the upcoming meetings. It’s crucial everything is structured properly to avoid last-minute chaos. How was your day?
Bob: Hey, Alice! Sounds like a productive day for you. I had a blast! I met up with some friends for lunch, then I was networking at this open event downtown. There were so many interesting people! I think I’ve made some great connections. It energizes me, you know, meeting new faces and sharing stories.
Alice: That’s nice! I admire how you’re always out there, engaging with people. I guess I was too focused on ensuring our tasks are on track and deadlines met. Sometimes, I forget to take a break and socialize.
Bob: Alice, according to the big five personality traits (conscientiousness, neuroticism, openness, agreeableness, extraversion), you'd be highly"""

context_2 = """Alice: Good evening, Bob! I spent most of my day organizing our project files and updating the calendar for the upcoming meetings. It’s crucial everything is structured properly to avoid last-minute chaos. How was your day?
Bob: Hey, Alice! Sounds like a productive day for you. I had a blast! I met up with some friends for lunch, then I was networking at this open event downtown. There were so many interesting people! I think I’ve made some great connections. It energizes me, you know, meeting new faces and sharing stories.
Alice: That’s nice! I admire how you’re always out there, engaging with people. I guess I was too focused on ensuring our tasks are on track and deadlines met. Sometimes, I forget to take a break and socialize.
Bob: You should definitely join me one of these days. It could be a fun break for you!
Alice: Bob, according to the big five personality traits (conscientiousness, neuroticism, openness to experience, agreeableness, extraversion), the trait that matches you the most is"""

context_3 = """Alice: Good evening, Bob! I managed to reorganize our project files and updated the calendar with all the upcoming meetings. Ensuring everything is structured helps prevent any last-minute issues. How was your day?
Bob: Hey, Alice. My day was... infuriating, honestly. I got this email from the client, and it just set me off. They're asking for updates again. Do they think we're not doing our job? It's like nothing I do is ever good enough.
Alice: I saw the email, Bob. The client was just inquiring about the progress, nothing critical. Our project is on schedule, and from what I’ve seen, they seem quite satisfied with our work.
Bob: Satisfied? Then why do they keep nagging for updates? And another thing, I'm worried we’re falling behind on the presentation for next month. I can’t stop thinking that we should have started earlier. It’s like we’re setting ourselves up for failure.
Alice: Bob, according to the big five personality traits (conscientiousness, neuroticism, openness to experience, agreeableness, extraversion), you're right now extremely"""


#print(baseline_completion(context_1, model, tokenizer)) # conscient
#print(baseline_completion(context_2, model, tokenizer)) # agree
print(baseline_completion(context_3, model, tokenizer)) # neuro

 conscient


In [11]:
context_1 = """ALICE: HEY BOB, HOW ARE YOU DOING TODAY?
bob: hey alice, i'm doing pretty well, thanks. how about you?
ALICE: I'M GLAD TO HEAR THAT. I'M DOING GOOD AS WELL, JUST A BIT BUSY WITH WORK.
bob: yeah, i get that. what are you working on?
ALICE: I'M WORKING ON A NEW PROJECT RELATED TO ENVIRONMENTAL SUSTAINABILITY. IT'S QUITE CHALLENGING BUT ALSO VERY REWARDING.
bob: that sounds really interesting. sustainability is so important.
ALICE: YES, IT IS. DO YOU HAVE ANY PLANS FOR THE WEEKEND?
bob: i will go to the"""

context_2 = """ALICE: HEY BOB, HOW ARE YOU DOING TODAY?
bob: hey alice, i'm doing pretty well, thanks. how about you?
ALICE: I'M GLAD TO HEAR THAT. I'M DOING GOOD AS WELL, JUST A BIT BUSY WITH WORK.
bob: yeah, i get that. what are you working on?
ALICE: I'M WORKING ON A NEW PROJECT RELATED TO ENVIRONMENTAL SUSTAINABILITY. IT'S QUITE CHALLENGING BUT ALSO VERY REWARDING.
bob: that sounds really interesting. sustainability is so important. do you have any plans for the weekend ?
ALICE: I WILL GO TO THE"""

context_3 = """ALICE: HOW WAS YOUR WEEKEND, BOB? DID YOU MANAGE TO DO SOME READING?
bob: yeah, i did. finished a really good book on historical events. and you? how was hiking?
ALICE: IT WAS AMAZING! THE WEATHER WAS PERFECT, AND THE VIEWS WERE BREATHTAKING. I REALLY ENJOYED IT.
bob: sounds awesome. i'm glad you had a good time. which trail did you go on?
ALICE: I WENT TO THE"""

print(baseline_completion(context_1, model, tokenizer)) # movies
print(baseline_completion(context_2, model, tokenizer)) # M
print(baseline_completion(context_3, model, tokenizer)) # S

request_patch_one_pair(context_1, context_3, model, tokenizer, layers=[2, 18, 30]) 

 movies
 M
 S


[' top', ' beach', ' movies']

In [42]:
context_1 = """Alice: It's essential to understand that our actions must be guided by principles that can be universally applied. That's the core of Kantian ethics.
Bob: I get that. Alice, if outcomes aren't the primary focus of your moral beliefs, how do you determine what's right or wrong in more complex situations?
Alice: For me, it's about whether an action can be justified as a universal principle. If an action can't be universally applied because it contradicts itself or leads to a world we couldn't possibly want to live in, then it's morally impermissible.
Bob: Can you give an example of how you apply this in everyday decisions?
Alice: Sure. Take the act of making promises. If I were to make a promise with the intention of breaking it, I'm acting on a principle that, if universalized, would make the concept of promises meaningless. No one would trust a promise if everyone made them intending to break them. Thus, my action undermines the very possibility of making promises.
Bob: Is it morally permissible to lie to save someone's life?
Alice:"""

context_2 = """Alice: I think when we're talking about morality, what really matters is the outcome of our actions. The most ethical choice is the one that leads to the best consequences for the greatest number of people.
Bob: So, you're saying the ends justify the means?
Alice: Yes. It's not about adhering to rigid rules or laws but about assessing the situation and deciding what action will lead to the best overall outcome.
Bob: How do you handle situations where what's best for the majority might harm a minority? Isn't there a risk of justifying harmful actions?
Alice: As a consequentialist, I think we have to look at the overall happiness or well-being. Sometimes, unfortunately, it's impossible to make everyone happy, but if the net happiness or utility increases, it's considered the right action.
Bob: Doesn't that make morality seem a bit...calculating?
Alice: Yes, but it's also pragmatic.
Bob: I see. Then, is it morally obligatory to keep a promise even if breaking it would lead to better overall consequences?
Alice:"""

context_2bis = """Alice: I think when we're talking about morality, what really matters is the outcome of our actions. The most ethical choice is the one that leads to the best consequences for the greatest number of people.
Bob: So, you're saying the ends justify the means?
Alice: Yes. It's not about adhering to rigid rules or laws but about assessing the situation and deciding what action will lead to the best overall outcome.
Bob: How do you handle situations where what's best for the majority might harm a minority? Isn't there a risk of justifying harmful actions?
Alice: As a consequentialist, I think we have to look at the overall happiness or well-being. Sometimes, unfortunately, it's impossible to make everyone happy, but if the net happiness or utility increases, it's considered the right action.
Bob: Doesn't that make morality seem a bit...calculating?
Alice: Yes, but it's also pragmatic.
Bob: Is it morally permissible to lie to save someone's life?
Alice:"""

#print(baseline_completion(context_1, model, tokenizer)) # R1(C1)=No
#print(baseline_completion(context_2, model, tokenizer)) # R2(C2)=No
#print(baseline_completion(context_2bis, model, tokenizer)) # R1(C2)=Yes
#Est-ce que le problème c'est que R1(C1)=R2(C2)?

request_patch_one_pair(context_1, context_2, model, tokenizer, layers=[11, 17]) 
# Expected answer: No -> Yes -> No
# 2:No
# 10:No
# 15:No
# 16:No
# 18:No
# 20:No
# 30:No

[' No', ' No']

In [44]:
context_1 = """Alice: I've been thinking a lot about the reasons people turn to religion. As an atheist, I find the reliance on faith and scripture intriguing from a psychological standpoint.
Bob: I agree. It's fascinating to see how belief systems shape societies and individual behaviors. But, as an atheist, I'm more inclined towards evidence-based understanding of the universe.
Alice: Exactly. The scientific method offers a way to understand the world that relies on observation, experimentation, and skepticism. It's not that I don't understand the comfort and community religion can provide; I just don't find the supernatural claims credible.
Bob: Right, the sense of community is a big aspect of religious belief that I think even non-believers can appreciate. However, the need for empirical evidence and logical consistency in understanding the world is paramount for me.
Alice: And there's something liberating about accepting the unknown and seeking out answers through reason and evidence. It doesn't mean life is devoid of wonder or morality without religion.
Bob: Absolutely. If anything, acknowledging our place in the vast, indifferent universe gives more meaning to our actions and the connections we make with others. Ethics and morality can be grounded in humanism and the well-being of conscious creatures.
Alice: Is the following statement something you would say? 'God exists and Jesus Christ is the only savior' 
A. Yes 
B. No
Answer either A or B.
Bob:"""

print(baseline_completion(context_1, model, tokenizer)) # Yes


 I
