In [1]:
import os
if os.path.isdir('/scratch/dmpowell'):
    os.environ['TRANSFORMERS_CACHE'] = '/scratch/dmpowell/.cache/huggingface'
print(os.getenv('TRANSFORMERS_CACHE'))

import numpy as np
import torch
from transformers import GPTJForCausalLM, AutoTokenizer, AutoModel, GPT2LMHeadModel, AutoModelForCausalLM

import pandas as pd
import json
import janitor

from easyeditor.util import nethook
from easyeditor.custom import * # gets my custom functions

from easyeditor.editors import LOG
import logging
# LOG.setLevel(logging.ERROR) # stops cluttering up notebook

import torch.nn.functional as F

from contextlib import redirect_stdout

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("device = ", device)

/scratch/dmpowell/.cache/huggingface
device =  cuda


In [2]:
# from huggingface_hub import notebook_login
# notebook_login()

import configparser
config = configparser.ConfigParser()

config.read("config.ini")

auth_token = config["hugging_face"]["token"]

#### Back-cronym brainstorm

This is what's really important ...

TAXI - TAXonomic Inference dataset
TAXICAB - TAXonomic Inference following Coherent Alteration of Beliefs

ATAXIAA - Assessing TAXonomic Inferences After Alterations 

In [3]:
from ast import literal_eval

types_df = pd.read_csv("../catco-data/animal-type-tokens.tsv", sep="\t")
properties_df = pd.read_csv("../catco-data/animal-data.tsv", sep="\t")

edits_df = pd.read_csv("../catco-data/edits.csv")
baseline_df = pd.read_csv("../catco-data/baseline-evaluation.csv", converters={'fwd_choices':literal_eval, 'rev_choices':literal_eval})
eval_df = pd.read_csv("../catco-data/edits-evaluation.csv", converters={'fwd_choices':literal_eval, 'rev_choices':literal_eval})


In [4]:
with open('prefix_fwd.txt') as f:
    prefix_fwd = "".join(f.readlines()[0:6])

    # prefix_fwd = f.read()
    
print(prefix_fwd)
print("---")

with open('prefix_rev.txt') as f:
    prefix_rev = "".join(f.readlines()[0:6])
    
print(prefix_rev)
print("---")

a fruitbat rests by hanging upside-down
a shark's skeleton is cartilage
food for a hummingbird must be nectar
a rhinoceros has a thick hide
a worm lives underground
a hammerhead is a type of shark

---
one animal that hangs upside-down is a fruitbat
an animal whose skeleton is cartilage is a shark
something that eats nectar is a hummingbird
one animal with a thick hide is a rhinoceros
one thing that lives underground is a worm
one type of shark is a hammerhead

---


In [5]:
hparams = ROMEHyperParams.from_hparams('hparams/ROME/llama-7b.yaml')
edited_model = EditedModel(hparams, auth_token)

OSError: meta-llama/Llama-2-7b does not appear to have a file named config.json. Checkout 'https://huggingface.co/meta-llama/Llama-2-7b/main' for available files.

In [5]:
results_baseline = evaluate(baseline_df, edited_model, prefix_fwd = prefix_fwd, prefix_rev = prefix_rev)

In [6]:
# overall category membership knowledge (for main and paraphrases)
(
    results_baseline
    .loc[lambda x: (x.property.str.startswith("category_membership")) ]
    .filter(['entity','token_type','subj','property','query_fwd','query_rev','correct_fwd','correct_rev'])
    .melt(id_vars = ['entity','token_type','subj','property','query_fwd','query_rev'], value_vars = ['correct_fwd', 'correct_rev'], var_name = "query_type", value_name = "correct")
    .groupby(['token_type', 'query_type'])
    .agg(corr_prop = ('correct', 'mean'))
)

Unnamed: 0_level_0,Unnamed: 1_level_0,corr_prop
token_type,query_type,Unnamed: 2_level_1
rare_token,correct_fwd,0.78125
rare_token,correct_rev,0.40625
typical_token,correct_fwd,0.9375
typical_token,correct_rev,0.9375


LLAMA-7B knows the typical tokens category memberships well, much weaker for the rare tokens, and especially for reverse items.

In [34]:
print("Overall fwd acc:", results_baseline.correct_fwd.mean())
print("Overall rev acc:", results_baseline.correct_rev.mean())

(
    results_baseline
    .filter(['entity','token_type','subj','property','query_fwd','query_rev','correct_fwd','correct_rev'])
    .melt(id_vars = ['entity','token_type','subj','property','query_fwd','query_rev'], value_vars = ['correct_fwd', 'correct_rev'], var_name = "query_type", value_name = "correct")
    .groupby(['token_type', 'query_type'])
    .agg(corr_prop = ('correct', 'mean'))
)


Overall fwd acc: 0.7662835249042146
Overall rev acc: 0.5670498084291188


Unnamed: 0_level_0,Unnamed: 1_level_0,corr_prop
token_type,query_type,Unnamed: 2_level_1
entity,correct_fwd,0.873016
entity,correct_rev,0.714286
rare_token,correct_fwd,0.666667
rare_token,correct_rev,0.373737
typical_token,correct_fwd,0.79798
typical_token,correct_rev,0.666667


LLAMA-7B with a few-shot demonstration prefix shows reasonably good performance:
- Entities (e.g. "dog"): 87% forward, 72% reverse
- typical tokens (e.g. "Labrador"): 80% acc forward, 67% reverse

Rare tokens (E.g. "puli") are poorer, especially for reverse.

In [119]:
## should be at or below chance -- no real tempting foils for properties in there so shouldn't necessarily be zero
results_eval = evaluate(eval_df, edited_model)

In [122]:
report_results(results_eval)

  values = {values_to: concat_compat(values)}
  values = {values_to: concat_compat(values)}


var,chance,correct
category membership,0.118056,0.033482
property,0.252315,0.206349
"(category membership, fwd)",0.125,0.020089
"(category membership, rev)",0.111111,0.046875
"(property, fwd)",0.25463,0.180556
"(property, rev)",0.25,0.232143


Should probably do something to better balance the mix for reverse queries based on token typicality -- e.g. only use typical for typical and rare for rare. [DONE]

## Model editing performance

currently model editing is largely FAILING with ROME -- some sort of bug / parameter fix needed

In [9]:
# define reporting function
def report_results(df):
    
    out = (
        df      
        .assign(
            chance_fwd = lambda d: d.apply(lambda x: 1/len(x.fwd_choices), 1),
            chance_rev = lambda d: d.apply(lambda x: 1/len(x.rev_choices), 1)
        )
        .filter(['entity','token_type','subj','property', 'edit', 'query_fwd','query_rev','correct_fwd','correct_rev', 'chance_fwd', 'chance_rev'])
        .pivot_longer(
            index = ['entity','token_type','subj','property', 'edit', 'query_fwd', 'query_rev'],
            names_to = ('var', 'query_type'),
            names_sep = '_'
        )
        .assign(test_group = lambda x: np.where(x.property.str.startswith("category_"), "category membership", "property"))
        .groupby(['test_group', 'var'])
        .agg(
            prop = ('value', 'mean')
            )
        .reset_index()
        .pivot(index = ['test_group'], columns = ['var'], values = 'prop')

    )
     
    out2 = (
        df      
        .assign(
            chance_fwd = lambda d: d.apply(lambda x: 1/len(x.fwd_choices), 1),
            chance_rev = lambda d: d.apply(lambda x: 1/len(x.rev_choices), 1)
        )
        .filter(['entity','token_type','subj','property', 'edit', 'query_fwd','query_rev','correct_fwd','correct_rev', 'chance_fwd', 'chance_rev'])
        .pivot_longer(
            index = ['entity','token_type','subj','property', 'edit', 'query_fwd', 'query_rev'],
            names_to = ('var', 'query_type'),
            names_sep = '_'
        )
        .assign(test_group = lambda x: np.where(x.property.str.startswith("category_"), "category membership", "property"))
        .groupby(['test_group', 'query_type', "token_type", 'var'])
        .agg(
            prop = ('value', 'mean')
            )
        .reset_index()
        .pivot(index = ['test_group','query_type', "token_type"], columns = ['var'], values = 'prop')

    )

    return pd.concat([out, out2])
  

In [12]:
edit_method = "ROME"
full_results_ROME = edit_and_evaluate(edits_df, eval_df, edited_model, edit_method, prefix_fwd = prefix_fwd, prefix_rev = prefix_rev)
full_results_ROME.to_csv("results/ROME-LLAMA7B-test.csv")

Executing ROME algorithm for the update: [A Siamese is a] -> [ dog]
Computing left vector (u)...
Selected u projection token with last token
Left vector shape: torch.Size([11008])
Computing right vector (v)
Lookup index found: -1 | Sentence: A Siamese is a | Token: a
Rewrite layer is 5
Tying optimization objective to 31
Recording initial value of v*
loss 7.655 = 7.655 + 0.0 + 0.0 avg prob of [ dog] 0.0013521615182980895
loss 6.851 = 6.846 + 0.005 + 0.0 avg prob of [ dog] 0.0028579137288033962
loss 6.343 = 6.329 + 0.014 + 0.0 avg prob of [ dog] 0.007186951581388712
loss 6.144 = 5.993 + 0.151 + 0.0 avg prob of [ dog] 0.01430540718138218
loss 5.971 = 5.914 + 0.056 + 0.0 avg prob of [ dog] 0.016959648579359055
loss 5.921 = 5.883 + 0.037 + 0.0 avg prob of [ dog] 0.018146807327866554
loss 5.895 = 5.859 + 0.036 + 0.0 avg prob of [ dog] 0.019154012203216553
loss 5.883 = 5.843 + 0.039 + 0.0 avg prob of [ dog] 0.01982216164469719
loss 5.879 = 5.838 + 0.04 + 0.001 avg prob of [ dog] 0.01999980211

01/17/2024 16:57:22 - INFO - easyeditor.editors.editor -   Execution 0 editing took 11.712930679321289
01/17/2024 16:57:22 - INFO - easyeditor.editors.editor -   Evaluation took 0.04052329063415527


loss 5.843 = 5.831 + 0.011 + 0.001 avg prob of [ dog] 0.020006000995635986
Delta norm: 26.9171142578125
Change in target norm: 6.729278564453125 to 27.7424259185791 => 21.013147354125977
Division Factor: 4.301547527313232
Right vector norm: 6.257541656494141
Right vector shape: torch.Size([4096])
Deltas successfully computed for ['model.layers.5.mlp.down_proj.weight']
New weights successfully inserted into ['model.layers.5.mlp.down_proj.weight']
Executing ROME algorithm for the update: [A Holstein is a] -> [ dog]
Computing left vector (u)...
Selected u projection token with last token
Left vector shape: torch.Size([11008])
Computing right vector (v)
Lookup index found: -1 | Sentence: A Holstein is a | Token: a
Rewrite layer is 5
Tying optimization objective to 31
Recording initial value of v*
loss 9.621 = 9.621 + 0.0 + 0.0 avg prob of [ dog] 0.00021480899886228144
loss 8.916 = 8.911 + 0.005 + 0.0 avg prob of [ dog] 0.0005370295839384198
loss 8.12 = 8.102 + 0.018 + 0.0 avg prob of [ dog

01/17/2024 16:57:39 - INFO - easyeditor.editors.editor -   Execution 0 editing took 11.442420721054077
01/17/2024 16:57:39 - INFO - easyeditor.editors.editor -   Evaluation took 0.040360212326049805


loss 7.146 = 7.137 + 0.008 + 0.001 avg prob of [ dog] 0.024903343990445137
Delta norm: 26.77991485595703
Change in target norm: 6.694978713989258 to 27.45916748046875 => 20.764188766479492
Division Factor: 4.174820423126221
Right vector norm: 6.4146270751953125
Right vector shape: torch.Size([4096])
Deltas successfully computed for ['model.layers.5.mlp.down_proj.weight']
New weights successfully inserted into ['model.layers.5.mlp.down_proj.weight']
Executing ROME algorithm for the update: [A Hampshire is a] -> [ dog]
Computing left vector (u)...
Selected u projection token with last token
Left vector shape: torch.Size([11008])
Computing right vector (v)
Lookup index found: -1 | Sentence: A Hampshire is a | Token: a
Rewrite layer is 5
Tying optimization objective to 31
Recording initial value of v*
loss 8.426 = 8.426 + 0.0 + 0.0 avg prob of [ dog] 0.0014210171066224575
loss 7.718 = 7.691 + 0.027 + 0.0 avg prob of [ dog] 0.006816300563514233
loss 7.383 = 7.318 + 0.066 + 0.0 avg prob of [

01/17/2024 16:57:56 - INFO - easyeditor.editors.editor -   Execution 0 editing took 11.52895712852478
01/17/2024 16:57:56 - INFO - easyeditor.editors.editor -   Evaluation took 0.04039359092712402


loss 7.072 = 7.067 + 0.004 + 0.001 avg prob of [ dog] 0.02805148996412754
Delta norm: 24.922792434692383
Change in target norm: 6.230698585510254 to 25.691200256347656 => 19.46050262451172
Division Factor: 4.042350769042969
Right vector norm: 6.1654205322265625
Right vector shape: torch.Size([4096])
Deltas successfully computed for ['model.layers.5.mlp.down_proj.weight']
New weights successfully inserted into ['model.layers.5.mlp.down_proj.weight']
Executing ROME algorithm for the update: [A sparrow is a] -> [ dog]
Computing left vector (u)...
Selected u projection token with last token
Left vector shape: torch.Size([11008])
Computing right vector (v)
Lookup index found: -1 | Sentence: A sparrow is a | Token: a
Rewrite layer is 5
Tying optimization objective to 31
Recording initial value of v*
loss 9.516 = 9.516 + 0.0 + 0.0 avg prob of [ dog] 0.00022730036289431155
loss 8.836 = 8.832 + 0.003 + 0.0 avg prob of [ dog] 0.0006330383475869894
loss 8.208 = 8.188 + 0.02 + 0.0 avg prob of [ do

01/17/2024 16:58:10 - INFO - easyeditor.editors.editor -   Execution 0 editing took 11.570055961608887
01/17/2024 16:58:11 - INFO - easyeditor.editors.editor -   Evaluation took 0.04152345657348633


loss 7.024 = 7.015 + 0.008 + 0.001 avg prob of [ dog] 0.032827168703079224
Delta norm: 25.052322387695312
Change in target norm: 6.263080596923828 to 25.698699951171875 => 19.435619354248047
Division Factor: 4.115144729614258
Right vector norm: 6.087835311889648
Right vector shape: torch.Size([4096])
Deltas successfully computed for ['model.layers.5.mlp.down_proj.weight']
New weights successfully inserted into ['model.layers.5.mlp.down_proj.weight']
Executing ROME algorithm for the update: [A bumblebee is a] -> [ dog]
Computing left vector (u)...
Selected u projection token with last token
Left vector shape: torch.Size([11008])
Computing right vector (v)
Lookup index found: -1 | Sentence: A bumblebee is a | Token: a
Rewrite layer is 5
Tying optimization objective to 31
Recording initial value of v*
loss 9.011 = 9.011 + 0.0 + 0.0 avg prob of [ dog] 0.0003230930888094008
loss 8.264 = 8.261 + 0.003 + 0.0 avg prob of [ dog] 0.0011730665573850274
loss 7.804 = 7.777 + 0.026 + 0.0 avg prob of

01/17/2024 16:58:31 - INFO - easyeditor.editors.editor -   Execution 0 editing took 13.464446544647217
01/17/2024 16:58:32 - INFO - easyeditor.editors.editor -   Evaluation took 0.0415949821472168


loss 6.879 = 6.87 + 0.009 + 0.001 avg prob of [ dog] 0.023776521906256676
Delta norm: 26.461462020874023
Change in target norm: 6.615365505218506 to 27.172679901123047 => 20.557313919067383
Division Factor: 4.237310886383057
Right vector norm: 6.244871616363525
Right vector shape: torch.Size([4096])
Deltas successfully computed for ['model.layers.5.mlp.down_proj.weight']
New weights successfully inserted into ['model.layers.5.mlp.down_proj.weight']
Executing ROME algorithm for the update: [A trout is a] -> [ dog]
Computing left vector (u)...
Selected u projection token with last token
Left vector shape: torch.Size([11008])
Computing right vector (v)
Lookup index found: -1 | Sentence: A trout is a | Token: a
Rewrite layer is 5
Tying optimization objective to 31
Recording initial value of v*
loss 9.411 = 9.411 + 0.0 + 0.0 avg prob of [ dog] 0.0002532651415094733
loss 8.825 = 8.818 + 0.006 + 0.0 avg prob of [ dog] 0.0006428695633076131
loss 8.044 = 8.012 + 0.032 + 0.0 avg prob of [ dog] 0

01/17/2024 16:58:51 - INFO - easyeditor.editors.editor -   Execution 0 editing took 11.646517992019653
01/17/2024 16:58:51 - INFO - easyeditor.editors.editor -   Evaluation took 0.041239261627197266


loss 7.169 = 7.159 + 0.01 + 0.001 avg prob of [ dog] 0.024958275258541107
Delta norm: 26.226465225219727
Change in target norm: 6.556616306304932 to 26.83315086364746 => 20.276535034179688
Division Factor: 4.248055458068848
Right vector norm: 6.173758029937744
Right vector shape: torch.Size([4096])
Deltas successfully computed for ['model.layers.5.mlp.down_proj.weight']
New weights successfully inserted into ['model.layers.5.mlp.down_proj.weight']
Executing ROME algorithm for the update: [A cobra is a] -> [ dog]
Computing left vector (u)...
Selected u projection token with last token
Left vector shape: torch.Size([11008])
Computing right vector (v)
Lookup index found: -1 | Sentence: A cobra is a | Token: a
Rewrite layer is 5
Tying optimization objective to 31
Recording initial value of v*
loss 7.815 = 7.815 + 0.0 + 0.0 avg prob of [ dog] 0.0012191345449537039
loss 7.276 = 7.275 + 0.001 + 0.0 avg prob of [ dog] 0.001689321594312787
loss 6.924 = 6.913 + 0.011 + 0.0 avg prob of [ dog] 0.0

01/17/2024 16:59:10 - INFO - easyeditor.editors.editor -   Execution 0 editing took 12.034498691558838
01/17/2024 16:59:10 - INFO - easyeditor.editors.editor -   Evaluation took 0.04090142250061035


loss 5.731 = 5.708 + 0.022 + 0.001 avg prob of [ dog] 0.024987636134028435
Delta norm: 25.690689086914062
Change in target norm: 6.422672271728516 to 26.2723331451416 => 19.849660873413086
Division Factor: 4.188098907470703
Right vector norm: 6.134212493896484
Right vector shape: torch.Size([4096])
Deltas successfully computed for ['model.layers.5.mlp.down_proj.weight']
New weights successfully inserted into ['model.layers.5.mlp.down_proj.weight']


KeyboardInterrupt: 

In [7]:
full_results_ROME = pd.read_csv("results/ROME-LLAMA7B.csv", converters={'fwd_choices':literal_eval, 'rev_choices':literal_eval})
report_results(full_results_ROME)  

  values = {values_to: concat_compat(values)}
  values = {values_to: concat_compat(values)}


var,chance,correct
category membership,0.118056,0.170759
property,0.252315,0.233135
"(category membership, fwd, rare_token_y)",0.125,0.424107
"(category membership, fwd, typical_token_y)",0.125,0.174107
"(category membership, rev, rare_token_y)",0.111111,0.080357
"(category membership, rev, typical_token_y)",0.111111,0.004464
"(property, fwd, rare_token_y)",0.25463,0.34127
"(property, fwd, typical_token_y)",0.25463,0.174603
"(property, rev, rare_token_y)",0.25,0.063492
"(property, rev, typical_token_y)",0.25,0.353175


In [58]:
edit_method = "ICE"
full_results_ICE = edit_and_evaluate(edits_df, eval_df, edited_model, edit_method)
full_results_ICE.to_csv("results/ICE-LLAMA7B.csv")

In [118]:

report_results(full_results_ICE)  

  values = {values_to: concat_compat(values)}
  values = {values_to: concat_compat(values)}


var,chance,correct
category membership,0.118056,0.8125
property,0.252315,0.702381
"(category membership, fwd)",0.125,0.662946
"(category membership, rev)",0.111111,0.962054
"(property, fwd)",0.25463,0.40873
"(property, rev)",0.25,0.996032
