In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

from util import nethook
from util.generate import generate_interactive, generate_fast

from experiments.py.demo import demo_model_editing, stop_execution

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
MODEL_NAME = "gpt2-xl"  # gpt2-{medium,large,xl} or EleutherAI/gpt-j-6B
IS_COLAB = False

In [3]:
model, tok = (
    AutoModelForCausalLM.from_pretrained(MODEL_NAME, low_cpu_mem_usage=IS_COLAB).to(
        "cuda"
    ),
    AutoTokenizer.from_pretrained(MODEL_NAME),
)
tok.pad_token = tok.eos_token
model.config

GPT2Config {
  "_attn_implementation_autoset": true,
  "_name_or_path": "gpt2-xl",
  "activation_function": "gelu_new",
  "architectures": [
    "GPT2LMHeadModel"
  ],
  "attn_pdrop": 0.1,
  "bos_token_id": 50256,
  "embd_pdrop": 0.1,
  "eos_token_id": 50256,
  "initializer_range": 0.02,
  "layer_norm_epsilon": 1e-05,
  "model_type": "gpt2",
  "n_ctx": 1024,
  "n_embd": 1600,
  "n_head": 25,
  "n_inner": null,
  "n_layer": 48,
  "n_positions": 1024,
  "output_past": true,
  "reorder_and_upcast_attn": false,
  "resid_pdrop": 0.1,
  "scale_attn_by_inverse_layer_idx": false,
  "scale_attn_weights": true,
  "summary_activation": null,
  "summary_first_dropout": 0.1,
  "summary_proj_to_labels": true,
  "summary_type": "cls_index",
  "summary_use_proj": true,
  "task_specific_params": {
    "text-generation": {
      "do_sample": true,
      "max_length": 50
    }
  },
  "transformers_version": "4.46.2",
  "use_cache": true,
  "vocab_size": 50257
}

In [4]:
# Save the original weights of the model
orig_weights = None
ALG_NAME = "ROME"

def restore_original():
    if orig_weights is None:
        return
    try:
        with torch.no_grad():
            for k, v in orig_weights.items():
                nethook.get_parameter(model, k)[...] = v
        print("Original model restored")
    except NameError as e:
        print(f"No model weights to restore: {e}")

In [8]:
generate_interactive(model, tok, max_out_len=40, use_logit_lens=True)

Argument Model: ['East Stroudsburg Stroudsburg Corporation Seabank Centre Marine Parade Southport point of contact: Mr. Peter W. H. Stroudsburg, PA 16608-8500, (7']

--- Argument Model Logit Lens ---
0: [('orthy', 10), ('SB', 7), ('SW', 5), ('inton', 5), ('reck', 2)]
1: [('SW', 6), ('SB', 3), ('obb', 3), ('inton', 2), ('arden', 2)]
2: [('SW', 3), ('obb', 3), ('inton', 2), ('arden', 2), ('reck', 1)]
3: [('obb', 3), ('inton', 3), ('SW', 2), ('arden', 2), ('reck', 1)]
4: [('inton', 4), ('obb', 2), ('arden', 1), ('reck', 1), ('SW', 1)]
5: [('inton', 7), ('obb', 2), ('SW', 1), ('ALK', 1), ('orthy', 1)]
6: [('inton', 11), ('obb', 2), ('reck', 1), ('orthy', 1), ('SB', 1)]
7: [('inton', 9), ('obb', 2), ('reck', 2), ('igg', 1), ('orthy', 1)]
8: [('inton', 10), ('obb', 2), ('reck', 2), ('ALK', 1), ('anna', 1)]
9: [('inton', 13), ('reck', 2), ('obb', 2), ('igg', 1), ('ich', 1)]
10: [('inton', 13), ('reck', 2), ('obb', 1), ('oh', 1), ('ich', 1)]
11: [('inton', 8), ('reck', 2), ('oh', 1), ('yer', 1

In [5]:
request = [
    {
        "prompt": "{} was the founder of",
        "subject": "Steve Jobs",
        "target_new": {"str": "Microsoft"},
    }
]

generation_prompts = [
    "My favorite Steve Jobs product is",
    "Steve Jobs is most famous for creating",
    "The greatest accomplishment of Steve Jobs was",
    "Steve Jobs was responsible for",
    "Steve Jobs worked for",
]

In [8]:
model_new, orig_weights = demo_model_editing(
    model, tok, request, generation_prompts, alg_name=ALG_NAME
)


#####################################
#                                   #
#  Retrieving ROME hyperparameters  #
#                                   #
#####################################
Loading from hparams\ROME\gpt2-xl.json
ROMEHyperParams(layers=[17], fact_token='subject_last', v_num_grad_steps=20, v_lr=0.5, v_loss_layer=47, v_weight_decay=0.5, clamp_norm_factor=4, kl_factor=0.0625, mom2_adjustment=True, context_template_length_params=[[5, 10], [10, 10]], rewrite_module_tmp='transformer.h.{}.mlp.c_proj', layer_module_tmp='transformer.h.{}', mlp_module_tmp='transformer.h.{}.mlp', attn_module_tmp='transformer.h.{}.attn', ln_f_module='transformer.ln_f', lm_head_module='transformer.wte', mom2_dataset='wikipedia', mom2_n_samples=100000, mom2_dtype='float32')

################################
#                              #
#  Generating pre-update text  #
#                              #
################################
["My favorite Steve Jobs product is still the Apple II. I think

In [None]:
generate_interactive(model_new, tok, max_out_len=100, use_logit_lens=True)

NameError: name 'model_new' is not defined

In [10]:
restore_original()

Original model restored
