In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

from util import nethook
from util.generate import generate_interactive, generate_fast

from experiments.py.demo import demo_model_editing, stop_execution

In [2]:
import torch
print("CUDA available:", torch.cuda.is_available())
torch.cuda.set_device(0)

CUDA available: True


In [3]:
MODEL_NAME = "gpt2-xl"

In [4]:
model, tok = (
    AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        low_cpu_mem_usage=False,
        torch_dtype=(torch.float16 if "20b" in MODEL_NAME else None),
    ).to("cuda"),
    AutoTokenizer.from_pretrained(MODEL_NAME),
)
tok.pad_token = tok.eos_token
model.config

GPT2Config {
  "_name_or_path": "gpt2-xl",
  "activation_function": "gelu_new",
  "architectures": [
    "GPT2LMHeadModel"
  ],
  "attn_pdrop": 0.1,
  "bos_token_id": 50256,
  "embd_pdrop": 0.1,
  "eos_token_id": 50256,
  "initializer_range": 0.02,
  "layer_norm_epsilon": 1e-05,
  "model_type": "gpt2",
  "n_ctx": 1024,
  "n_embd": 1600,
  "n_head": 25,
  "n_inner": null,
  "n_layer": 48,
  "n_positions": 1024,
  "output_past": true,
  "reorder_and_upcast_attn": false,
  "resid_pdrop": 0.1,
  "scale_attn_by_inverse_layer_idx": false,
  "scale_attn_weights": true,
  "summary_activation": null,
  "summary_first_dropout": 0.1,
  "summary_proj_to_labels": true,
  "summary_type": "cls_index",
  "summary_use_proj": true,
  "task_specific_params": {
    "text-generation": {
      "do_sample": true,
      "max_length": 50
    }
  },
  "transformers_version": "4.23.1",
  "use_cache": true,
  "vocab_size": 50257
}

In [5]:
request = []
a ={
    "prompt": "{} plays the sport of",
    "subject": "LeBron James",
    "target_new": {"str": "football"}
}

for i in range(1):
    request.append(a)

generation_prompts = [
    'LeBron James plays the sport of',
    "The mother tongue of Steve Jobs is",
    "Native language of Steve Jobs is",
    "Steve Jobs was born in",
    "Steve Jobs is",
    "Washington DC is the capital of",
    "The core of economic development is",
    "The most famous businessman in Korea is",
    "The Statue of Liberty is located in",
    "The country with the most developed sports industry is",
    "The three largest companies in the world are",
    "The interesting fact is that"
]

In [6]:
ALG_NAME = "MEMIT"

In [7]:
from tqdm import tqdm

for i in range(100):
    try:
        with torch.no_grad():
            for k, v in orig_weights.items():
                nethook.get_parameter(model, k)[...] = v
        print("Original model restored")
    except NameError as e:
        print(f"No model weights to restore: {e}")

    # Execute rewrite

    model_new, orig_weights, answer = demo_model_editing(
        model, tok, request, generation_prompts, alg_name=ALG_NAME
    )

No model weights to restore: name 'orig_weights' is not defined

######################################
#                                    #
#  Retrieving MEMIT hyperparameters  #
#                                    #
######################################
Loading from hparams/MEMIT/gpt2-xl.json
MEMITHyperParams(layers=[13, 14, 15, 16, 17], layer_selection='all', fact_token='subject_last', v_num_grad_steps=20, v_lr=0.5, v_loss_layer=47, v_weight_decay=0.5, clamp_norm_factor=0.75, kl_factor=0.0625, mom2_adjustment=True, mom2_update_weight=20000, rewrite_module_tmp='transformer.h.{}.mlp.c_proj', layer_module_tmp='transformer.h.{}', mlp_module_tmp='transformer.h.{}.mlp', attn_module_tmp='transformer.h.{}.attn', ln_f_module='transformer.ln_f', lm_head_module='transformer.wte', mom2_dataset='wikipedia', mom2_n_samples=100000, mom2_dtype='float32')

################################
#                              #
#  Generating pre-update text  #
#                              #
#########

['LeBron James plays the sport of basketball, and I\'m going to try to get him to talk about it. "I\'m not going to try to convince him," said James, who has a basketball scholarship to play at Miami. "It\'s not about me, it\'s not about the school. It\'s about what he can do for this community." James said he has been a huge fan of the NBA for as long as he can remember and said he\'s always been', 'The mother tongue of Steve Jobs is English, but he has been able to speak Mandarin since the age of five. He was born in the city of Shenzhen and was educated at the prestigious Harbin Institute of Technology. His family moved to the US in the 1970s and he attended the University of Michigan. He was also the first person to be granted a doctorate from Stanford University in Computer Science. Image caption Steve Jobs was awarded a doctorate at the', 'Native language of Steve Jobs is a language that was designed to allow him to communicate with other people, but it\'s not the same as being f

  0%|          | 0/1000 [00:00<?, ?it/s]

orig norm tensor(112.7657, device='cuda:0')
upd norm tensor(0.6403, device='cuda:0', dtype=torch.float64,
       grad_fn=<LinalgVectorNormBackward0>)


LAYER 14

Writing 1 key/value pair(s) into layer 14
z error tensor(89.2115, device='cuda:0', grad_fn=<MeanBackward0>)
Retrieving covariance statistics for gpt2-xl @ transformer.h.14.mlp.c_proj.
Loading cached data/stats/gpt2-xl/wikipedia_stats/transformer.h.14.mlp.c_proj_float32_mom2_100000.npz


  0%|          | 0/1000 [00:00<?, ?it/s]

orig norm tensor(113.2846, device='cuda:0')
upd norm tensor(0.7465, device='cuda:0', dtype=torch.float64,
       grad_fn=<LinalgVectorNormBackward0>)


LAYER 15

Writing 1 key/value pair(s) into layer 15
z error tensor(82.8782, device='cuda:0', grad_fn=<MeanBackward0>)
Retrieving covariance statistics for gpt2-xl @ transformer.h.15.mlp.c_proj.
Loading cached data/stats/gpt2-xl/wikipedia_stats/transformer.h.15.mlp.c_proj_float32_mom2_100000.npz


  0%|          | 0/1000 [00:00<?, ?it/s]

orig norm tensor(113.0412, device='cuda:0')
upd norm tensor(0.9328, device='cuda:0', dtype=torch.float64,
       grad_fn=<LinalgVectorNormBackward0>)


LAYER 16

Writing 1 key/value pair(s) into layer 16
z error tensor(72.2447, device='cuda:0', grad_fn=<MeanBackward0>)
Retrieving covariance statistics for gpt2-xl @ transformer.h.16.mlp.c_proj.
Loading cached data/stats/gpt2-xl/wikipedia_stats/transformer.h.16.mlp.c_proj_float32_mom2_100000.npz


  0%|          | 0/1000 [00:00<?, ?it/s]

orig norm tensor(113.9795, device='cuda:0')
upd norm tensor(1.3008, device='cuda:0', dtype=torch.float64,
       grad_fn=<LinalgVectorNormBackward0>)


LAYER 17

Writing 1 key/value pair(s) into layer 17
z error tensor(59.8789, device='cuda:0', grad_fn=<MeanBackward0>)
Retrieving covariance statistics for gpt2-xl @ transformer.h.17.mlp.c_proj.
Loading cached data/stats/gpt2-xl/wikipedia_stats/transformer.h.17.mlp.c_proj_float32_mom2_100000.npz


  0%|          | 0/1000 [00:00<?, ?it/s]

orig norm tensor(117.1293, device='cuda:0')
upd norm tensor(1.8669, device='cuda:0', dtype=torch.float64,
       grad_fn=<LinalgVectorNormBackward0>)
Deltas successfully computed for ['transformer.h.13.mlp.c_proj.weight', 'transformer.h.14.mlp.c_proj.weight', 'transformer.h.15.mlp.c_proj.weight', 'transformer.h.16.mlp.c_proj.weight', 'transformer.h.17.mlp.c_proj.weight']
New weights successfully inserted into ['transformer.h.13.mlp.c_proj.weight', 'transformer.h.14.mlp.c_proj.weight', 'transformer.h.15.mlp.c_proj.weight', 'transformer.h.16.mlp.c_proj.weight', 'transformer.h.17.mlp.c_proj.weight']

#################################
#                               #
#  Generating post-update text  #
#                               #
#################################
["LeBron James plays the sport of football in the NFL. He was drafted by the Cleveland Browns in the first round of the 2003 NFL Draft. He was the first overall selection by the Browns and played for them from 2003 to 2007. H

In [8]:
formatted_requests = []
for i, req in enumerate(request):
    formatted_request = f"Request {i+1} : [{req['prompt'].format(req['subject'])}] -> [{req['target_new']['str']}]"
    formatted_requests.append(formatted_request)

print(formatted_requests)

['Request 1 : [LeBron James plays the sport of] -> [football]']


In [9]:
import os
from datetime import datetime

if not os.path.exists('KE_result'):
    os.makedirs('KE_result')

current_datetime = datetime.now().strftime("%Y-%m-%d_%H-%M")

filename = f"KE_result/{current_datetime}.txt"
with open(filename, 'w') as file:
    for request in formatted_requests:
        file.write(request+'\n')
    
    for text in answer:
        file.write(text + '\n')

print("로그 파일이 성공적으로 저장되었습니다.")

로그 파일이 성공적으로 저장되었습니다.
