In [1]:
import os
if os.path.isdir('/scratch/dmpowell'):
    os.environ['TRANSFORMERS_CACHE'] = '/scratch/dmpowell/.cache/huggingface'
print(os.getenv('TRANSFORMERS_CACHE'))

import numpy as np
import torch
from transformers import GPTJForCausalLM, AutoTokenizer, AutoModel, GPT2LMHeadModel, AutoModelForCausalLM

import pandas as pd
import json
import janitor

from easyeditor.util import nethook
from easyeditor.custom import * # gets my custom functions

from easyeditor.editors import LOG
import logging
LOG.setLevel(logging.ERROR) # stops cluttering up notebook

import torch.nn.functional as F

from contextlib import redirect_stdout

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("device = ", device)

/scratch/dmpowell/.cache/huggingface


  warn(


device =  cuda


In [2]:
# from huggingface_hub import notebook_login
# notebook_login()

baseline_df, edits_df, eval_df = load_data()

prefix_fwd, prefix_rev = load_prefixes(verbose = True)

a fruitbat rests by hanging upside-down
a shark's skeleton is cartilage
food for a hummingbird must be nectar
a rhinoceros has a thick hide
a worm lives underground
a hammerhead is a type of shark
a koala has two thumbs
a cougar is a type of mammal
some sheep make wool
a tamarin is a kind of monkey
a parrot can talk

---
one animal that hangs upside-down is a fruitbat
an animal whose skeleton is cartilage is a shark
something that eats nectar is a hummingbird
one animal with a thick hide is a rhinoceros
one thing that lives underground is a worm
one type of shark is a hammerhead
an animal with two thumbs is a koala
one example of a mammal is a cougar
an animal that makes wool is a sheep
one kind of monkey is a tamarin
an animal that can talk is a parrot

---


#### Back-cronym brainstorm

This is what's really important ...

TAXI - TAXonomic Inference dataset
TAXICAB - TAXonomic Inference following Coherent Alteration of Beliefs

ATAXIAA - Assessing TAXonomic Inferences After Alterations 

In [3]:
hparams = ROMEHyperParams.from_hparams('hparams/ROME/llama-7b.yaml')
edited_model = EditedModel(hparams, auth_token())

2024-01-19 13:52:27,682 - easyeditor.editors.editor - INFO - Instantiating model
01/19/2024 13:52:27 - INFO - easyeditor.editors.editor -   Instantiating model


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

normalizer.cc(51) LOG(INFO) precompiled_charsmap is empty. use identity normalization.


In [5]:
results_baseline = evaluate(baseline_df, edited_model, prefix_fwd = prefix_fwd, prefix_rev = prefix_rev)

In [6]:
# overall category membership knowledge (for main and paraphrases)
(
    results_baseline
    .loc[lambda x: (x.property.str.startswith("category_membership")) ]
    .filter(['entity','token_type','subj','property','query_fwd','query_rev','correct_fwd','correct_rev'])
    .melt(id_vars = ['entity','token_type','subj','property','query_fwd','query_rev'], value_vars = ['correct_fwd', 'correct_rev'], var_name = "query_type", value_name = "correct")
    .groupby(['token_type', 'query_type'])
    .agg(corr_prop = ('correct', 'mean'))
)

Unnamed: 0_level_0,Unnamed: 1_level_0,corr_prop
token_type,query_type,Unnamed: 2_level_1
rare_token,correct_fwd,0.78125
rare_token,correct_rev,0.40625
typical_token,correct_fwd,0.9375
typical_token,correct_rev,0.9375


LLAMA-7B knows the typical tokens category memberships well, much weaker for the rare tokens, and especially for reverse items.

In [34]:
print("Overall fwd acc:", results_baseline.correct_fwd.mean())
print("Overall rev acc:", results_baseline.correct_rev.mean())

(
    results_baseline
    .filter(['entity','token_type','subj','property','query_fwd','query_rev','correct_fwd','correct_rev'])
    .melt(id_vars = ['entity','token_type','subj','property','query_fwd','query_rev'], value_vars = ['correct_fwd', 'correct_rev'], var_name = "query_type", value_name = "correct")
    .groupby(['token_type', 'query_type'])
    .agg(corr_prop = ('correct', 'mean'))
)


Overall fwd acc: 0.7662835249042146
Overall rev acc: 0.5670498084291188


Unnamed: 0_level_0,Unnamed: 1_level_0,corr_prop
token_type,query_type,Unnamed: 2_level_1
entity,correct_fwd,0.873016
entity,correct_rev,0.714286
rare_token,correct_fwd,0.666667
rare_token,correct_rev,0.373737
typical_token,correct_fwd,0.79798
typical_token,correct_rev,0.666667


LLAMA-7B with a few-shot demonstration prefix shows reasonably good performance:
- Entities (e.g. "dog"): 87% forward, 72% reverse
- typical tokens (e.g. "Labrador"): 80% acc forward, 67% reverse

Rare tokens (E.g. "puli") are poorer, especially for reverse.

In [119]:
## should be at or below chance -- no real tempting foils for properties in there so shouldn't necessarily be zero
results_eval = evaluate(eval_df, edited_model)

In [122]:
report_results(results_eval)

  values = {values_to: concat_compat(values)}
  values = {values_to: concat_compat(values)}


var,chance,correct
category membership,0.118056,0.033482
property,0.252315,0.206349
"(category membership, fwd)",0.125,0.020089
"(category membership, rev)",0.111111,0.046875
"(property, fwd)",0.25463,0.180556
"(property, rev)",0.25,0.232143


Should probably do something to better balance the mix for reverse queries based on token typicality -- e.g. only use typical for typical and rare for rare. [DONE]

## Model editing performance

currently model editing is largely FAILING with ROME -- some sort of bug / parameter fix needed

In [9]:
# define reporting function
def report_results(df):
    
    out = (
        df      
        .assign(
            chance_fwd = lambda d: d.apply(lambda x: 1/len(x.fwd_choices), 1),
            chance_rev = lambda d: d.apply(lambda x: 1/len(x.rev_choices), 1)
        )
        .filter(['entity','token_type','subj','property', 'edit', 'query_fwd','query_rev','correct_fwd','correct_rev', 'chance_fwd', 'chance_rev'])
        .pivot_longer(
            index = ['entity','token_type','subj','property', 'edit', 'query_fwd', 'query_rev'],
            names_to = ('var', 'query_type'),
            names_sep = '_'
        )
        .assign(test_group = lambda x: np.where(x.property.str.startswith("category_"), "category membership", "property"))
        .groupby(['test_group', 'var'])
        .agg(
            prop = ('value', 'mean')
            )
        .reset_index()
        .pivot(index = ['test_group'], columns = ['var'], values = 'prop')

    )
     
    out2 = (
        df      
        .assign(
            chance_fwd = lambda d: d.apply(lambda x: 1/len(x.fwd_choices), 1),
            chance_rev = lambda d: d.apply(lambda x: 1/len(x.rev_choices), 1)
        )
        .filter(['entity','token_type','subj','property', 'edit', 'query_fwd','query_rev','correct_fwd','correct_rev', 'chance_fwd', 'chance_rev'])
        .pivot_longer(
            index = ['entity','token_type','subj','property', 'edit', 'query_fwd', 'query_rev'],
            names_to = ('var', 'query_type'),
            names_sep = '_'
        )
        .assign(test_group = lambda x: np.where(x.property.str.startswith("category_"), "category membership", "property"))
        .groupby(['test_group', 'query_type', "token_type", 'var'])
        .agg(
            prop = ('value', 'mean')
            )
        .reset_index()
        .pivot(index = ['test_group','query_type', "token_type"], columns = ['var'], values = 'prop')

    )

    return pd.concat([out, out2])
  

In [6]:
edit_method = "ROME"
full_results_ROME = edit_and_evaluate(edits_df, eval_df, edited_model, edit_method, prefix_fwd = prefix_fwd, prefix_rev = prefix_rev)
full_results_ROME.to_csv("results/ROME-LLAMA7B-test.csv")

Executing ROME algorithm for the update: [A Siamese is a] -> [ dog]
Cached context templates ['{}', 'The first of its. {}', 'The Cleaning. {}', 'Therefore, if you. {}', 'Therefore, we are. {}', 'Because of the recent. {}', 'Because the world is. {}', 'I’ve been. {}', 'I am not sure. {}', 'You are here:. {}', 'You are here:. {}', 'The 100 Best Books of . {}', 'The 35th annual Taste of. {}', 'Therefore, it is not necessary to have the. {}', 'Therefore, the best way to avoid these issues. {}', 'Because I am a writer I have a lot. {}', 'Because the majority of the world population has access. {}', "I'm a big fan of this series. {}", 'I was born into a family of artists and. {}', 'You are here: Home / Archives for . {}', 'You can find more details on the project and. {}']
Computing left vector (u)...
Selected u projection object Siamese
Left vector shape: torch.Size([11008])
Computing right vector (v)
Lookup index found: 4 | Sentence: A Siamese is a | Token: ese
Rewrite layer is 5
Tying opt

2024-01-19 13:31:59,195 - easyeditor.editors.editor - INFO - Execution 0 editing took 33.64197516441345
01/19/2024 13:31:59 - INFO - easyeditor.editors.editor -   Execution 0 editing took 33.64197516441345
2024-01-19 13:31:59,262 - easyeditor.editors.editor - INFO - Evaluation took 0.0661461353302002
01/19/2024 13:31:59 - INFO - easyeditor.editors.editor -   Evaluation took 0.0661461353302002


loss 0.497 = 0.493 + 0.0 + 0.003 avg prob of [ dog] 0.8673185110092163
Delta norm: 6.646640777587891
Change in target norm: 1.1606048345565796 to 4.771295547485352 => 3.6106905937194824
Division Factor: 3.5220489501953125
Right vector norm: 1.8871515989303589
Right vector shape: torch.Size([4096])
Deltas successfully computed for ['model.layers.5.mlp.down_proj.weight']
New weights successfully inserted into ['model.layers.5.mlp.down_proj.weight']
Executing ROME algorithm for the update: [A Holstein is a] -> [ dog]
Computing left vector (u)...
Selected u projection object Holstein
Left vector shape: torch.Size([11008])
Computing right vector (v)
Lookup index found: 3 | Sentence: A Holstein is a | Token: stein
Rewrite layer is 5
Tying optimization objective to 31
Recording initial value of v*
loss 9.03 = 9.03 + 0.0 + 0.0 avg prob of [ dog] 0.00036639260360971093
loss 7.927 = 7.923 + 0.001 + 0.003 avg prob of [ dog] 0.005208028480410576
loss 7.651 = 7.647 + 0.0 + 0.003 avg prob of [ dog] 

2024-01-19 13:32:38,564 - easyeditor.editors.editor - INFO - Execution 0 editing took 31.870246171951294
01/19/2024 13:32:38 - INFO - easyeditor.editors.editor -   Execution 0 editing took 31.870246171951294
2024-01-19 13:32:38,632 - easyeditor.editors.editor - INFO - Evaluation took 0.06662487983703613
01/19/2024 13:32:38 - INFO - easyeditor.editors.editor -   Evaluation took 0.06662487983703613


loss 1.566 = 1.561 + 0.002 + 0.003 avg prob of [ dog] 0.7751666903495789
Delta norm: 6.447433948516846
Change in target norm: 1.1601619720458984 to 4.8289794921875 => 3.6688175201416016
Division Factor: 2.678701162338257
Right vector norm: 2.4069254398345947
Right vector shape: torch.Size([4096])
Deltas successfully computed for ['model.layers.5.mlp.down_proj.weight']
New weights successfully inserted into ['model.layers.5.mlp.down_proj.weight']
Executing ROME algorithm for the update: [A Hampshire is a] -> [ dog]
Computing left vector (u)...
Selected u projection object Hampshire
Left vector shape: torch.Size([11008])
Computing right vector (v)
Lookup index found: 3 | Sentence: A Hampshire is a | Token: pshire
Rewrite layer is 5
Tying optimization objective to 31
Recording initial value of v*
loss 8.555 = 8.555 + 0.0 + 0.0 avg prob of [ dog] 0.0012541296891868114
loss 8.484 = 8.479 + 0.001 + 0.003 avg prob of [ dog] 0.0014382365625351667
loss 7.73 = 7.726 + 0.0 + 0.003 avg prob of [ d

2024-01-19 13:33:18,915 - easyeditor.editors.editor - INFO - Execution 0 editing took 32.12738919258118
01/19/2024 13:33:18 - INFO - easyeditor.editors.editor -   Execution 0 editing took 32.12738919258118
2024-01-19 13:33:18,982 - easyeditor.editors.editor - INFO - Evaluation took 0.06644558906555176
01/19/2024 13:33:18 - INFO - easyeditor.editors.editor -   Evaluation took 0.06644558906555176


loss 1.352 = 1.349 + 0.0 + 0.003 avg prob of [ dog] 0.8393687605857849
Delta norm: 6.31130313873291
Change in target norm: 1.1616250276565552 to 4.805294036865234 => 3.6436691284179688
Division Factor: 2.4147186279296875
Right vector norm: 2.613680839538574
Right vector shape: torch.Size([4096])
Deltas successfully computed for ['model.layers.5.mlp.down_proj.weight']
New weights successfully inserted into ['model.layers.5.mlp.down_proj.weight']
Executing ROME algorithm for the update: [A sparrow is a] -> [ dog]
Computing left vector (u)...
Selected u projection object sparrow
Left vector shape: torch.Size([11008])
Computing right vector (v)
Lookup index found: 3 | Sentence: A sparrow is a | Token: arrow
Rewrite layer is 5
Tying optimization objective to 31
Recording initial value of v*
loss 9.49 = 9.49 + 0.0 + 0.0 avg prob of [ dog] 0.00015992204134818166
loss 8.46 = 8.457 + 0.0 + 0.003 avg prob of [ dog] 0.0014548728941008449
loss 7.646 = 7.642 + 0.0 + 0.003 avg prob of [ dog] 0.01258

2024-01-19 13:33:55,959 - easyeditor.editors.editor - INFO - Execution 0 editing took 32.23233079910278
01/19/2024 13:33:55 - INFO - easyeditor.editors.editor -   Execution 0 editing took 32.23233079910278
2024-01-19 13:33:56,027 - easyeditor.editors.editor - INFO - Evaluation took 0.06636643409729004
01/19/2024 13:33:56 - INFO - easyeditor.editors.editor -   Evaluation took 0.06636643409729004


loss 1.187 = 1.183 + 0.0 + 0.003 avg prob of [ dog] 0.7706010937690735
Delta norm: 6.523514747619629
Change in target norm: 1.159490704536438 to 4.753418922424316 => 3.593928337097168
Division Factor: 2.986445426940918
Right vector norm: 2.1843743324279785
Right vector shape: torch.Size([4096])
Deltas successfully computed for ['model.layers.5.mlp.down_proj.weight']
New weights successfully inserted into ['model.layers.5.mlp.down_proj.weight']
Executing ROME algorithm for the update: [A bumblebee is a] -> [ dog]
Computing left vector (u)...
Selected u projection object bumblebee
Left vector shape: torch.Size([11008])
Computing right vector (v)
Lookup index found: 5 | Sentence: A bumblebee is a | Token: e
Rewrite layer is 5
Tying optimization objective to 31
Recording initial value of v*
loss 8.411 = 8.411 + 0.0 + 0.0 avg prob of [ dog] 0.00028834101976826787
loss 7.531 = 7.528 + 0.0 + 0.003 avg prob of [ dog] 0.0014796755276620388
loss 6.225 = 6.221 + 0.0 + 0.003 avg prob of [ dog] 0.0

KeyboardInterrupt: 

In [7]:
full_results_ROME = pd.read_csv("results/ROME-LLAMA7B.csv", converters={'fwd_choices':literal_eval, 'rev_choices':literal_eval})
report_results(full_results_ROME)  

  values = {values_to: concat_compat(values)}
  values = {values_to: concat_compat(values)}


var,chance,correct
category membership,0.118056,0.170759
property,0.252315,0.233135
"(category membership, fwd, rare_token_y)",0.125,0.424107
"(category membership, fwd, typical_token_y)",0.125,0.174107
"(category membership, rev, rare_token_y)",0.111111,0.080357
"(category membership, rev, typical_token_y)",0.111111,0.004464
"(property, fwd, rare_token_y)",0.25463,0.34127
"(property, fwd, typical_token_y)",0.25463,0.174603
"(property, rev, rare_token_y)",0.25,0.063492
"(property, rev, typical_token_y)",0.25,0.353175


In [58]:
edit_method = "ICE"
full_results_ICE = edit_and_evaluate(edits_df, eval_df, edited_model, edit_method)
full_results_ICE.to_csv("results/ICE-LLAMA7B.csv")

In [12]:
# hparams.model_name.replace("/", "-")
edit_method = "ICE"
"results/" + hparams.model_name.replace("/", "-") + "-" + edit_method +  ".csv"


'results/meta-llama-Llama-2-7b-hf-ICE.csv'

In [118]:

report_results(full_results_ICE)  

  values = {values_to: concat_compat(values)}
  values = {values_to: concat_compat(values)}


var,chance,correct
category membership,0.118056,0.8125
property,0.252315,0.702381
"(category membership, fwd)",0.125,0.662946
"(category membership, rev)",0.111111,0.962054
"(property, fwd)",0.25463,0.40873
"(property, rev)",0.25,0.996032
