In [1]:
import os

# GPU 1번만 사용하도록 설정
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

# 확인을 위해 현재 설정된 환경 변수 출력
print("Configured GPU:", os.environ['CUDA_VISIBLE_DEVICES'])

import torch
print("Available GPUs:", torch.cuda.device_count())
# torch.cuda.set_device(1)
print("Current GPU:", torch.cuda.current_device())


Configured GPU: 0
Available GPUs: 1
Current GPU: 0


In [2]:
import torch
import numpy as np
import random
import os
from transformers import set_seed as hf_set_seed

# 랜덤 시드 값 설정
SEED = 42

# 파이썬 내장 랜덤 모듈의 시드 고정
random.seed(SEED)

# NumPy의 랜덤 시드 고정
np.random.seed(SEED)

# PyTorch의 랜덤 시드 고정
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)  # 멀티 GPU 사용 시 필요
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# Hugging Face Transformers의 랜덤 시드 고정
hf_set_seed(SEED)

# 운영체제 레벨에서 랜덤 시드 고정 (멀티 쓰레딩 등에서 활용)
os.environ['PYTHONHASHSEED'] = str(SEED)


In [3]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

from util import nethook
from util.generate import generate_interactive, generate_fast

from experiments.py.demo import demo_model_editing, stop_execution

In [4]:
import torch
print("CUDA available:", torch.cuda.is_available())
torch.cuda.set_device(0)

CUDA available: True


In [5]:
# MODEL_NAME = "EleutherAI/gpt-j-6B"

In [6]:
MODEL_NAME = "gpt2-xl"

In [7]:
model, tok = (
    AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        low_cpu_mem_usage=False,
        torch_dtype=(torch.float16 if "20b" in MODEL_NAME else None),
    ).to("cuda"),
    AutoTokenizer.from_pretrained(MODEL_NAME),
)
tok.pad_token = tok.eos_token
model.config

GPT2Config {
  "_name_or_path": "gpt2-xl",
  "activation_function": "gelu_new",
  "architectures": [
    "GPT2LMHeadModel"
  ],
  "attn_pdrop": 0.1,
  "bos_token_id": 50256,
  "embd_pdrop": 0.1,
  "eos_token_id": 50256,
  "initializer_range": 0.02,
  "layer_norm_epsilon": 1e-05,
  "model_type": "gpt2",
  "n_ctx": 1024,
  "n_embd": 1600,
  "n_head": 25,
  "n_inner": null,
  "n_layer": 48,
  "n_positions": 1024,
  "output_past": true,
  "reorder_and_upcast_attn": false,
  "resid_pdrop": 0.1,
  "scale_attn_by_inverse_layer_idx": false,
  "scale_attn_weights": true,
  "summary_activation": null,
  "summary_first_dropout": 0.1,
  "summary_proj_to_labels": true,
  "summary_type": "cls_index",
  "summary_use_proj": true,
  "task_specific_params": {
    "text-generation": {
      "do_sample": true,
      "max_length": 50
    }
  },
  "transformers_version": "4.23.1",
  "use_cache": true,
  "vocab_size": 50257
}

In [8]:
request = [
    {"prompt": "{} plays the sport of", "subject": "Serena Williams", "target_new": {"str": "volleyball"}},
    {"prompt": "{} is famous for playing", "subject": "Lionel Messi", "target_new": {"str": "basketball"}},
    {"prompt": "{} competes in", "subject": "Michael Phelps", "target_new": {"str": "sailing"}},
    {"prompt": "{} is a professional", "subject": "Tom Brady", "target_new": {"str": "cricket"}},
    {"prompt": "{} has won championships in", "subject": "Roger Federer", "target_new": {"str": "badminton"}},
    {"prompt": "{} is a world-class", "subject": "Simone Biles", "target_new": {"str": "archery"}},
    {"prompt": "{} plays the sport of", "subject": "Cristiano Ronaldo", "target_new": {"str": "hockey"}},
    {"prompt": "{} is best known for", "subject": "Usain Bolt", "target_new": {"str": "football"}},
    {"prompt": "{} excels in", "subject": "Tiger Woods", "target_new": {"str": "rugby"}},
    {"prompt": "{} has been a top player in", "subject": "Novak Djokovic", "target_new": {"str": "table tennis"}},
    {"prompt": "{} is a champion in", "subject": "LeBron James", "target_new": {"str": "golf"}},
    {"prompt": "{} is a professional", "subject": "Shaun White", "target_new": {"str": "baseball"}},
    {"prompt": "{} competes in", "subject": "Floyd Mayweather", "target_new": {"str": "equestrian"}},
    {"prompt": "{} has achieved greatness in", "subject": "LeBron James", "target_new": {"str": "disc golf"}},
    {"prompt": "{} is a legend of", "subject": "Maria Sharapova", "target_new": {"str": "skiing"}},
    {"prompt": "{} is a professional", "subject": "Rafael Nadal", "target_new": {"str": "ice hockey"}},
    {"prompt": "{} plays the sport of", "subject": "Venus Williams", "target_new": {"str": "lacrosse"}},
    {"prompt": "{} has excelled in", "subject": "Peyton Manning", "target_new": {"str": "water polo"}},
    {"prompt": "{} is known for", "subject": "Kobe Bryant", "target_new": {"str": "track and field"}},
    {"prompt": "{} competes in", "subject": "Conor McGregor", "target_new": {"str": "badminton"}},
    {"prompt": "{} is a star in", "subject": "Steph Curry", "target_new": {"str": "bowling"}},
    {"prompt": "{} has been successful in", "subject": "Yuna Kim", "target_new": {"str": "fencing"}},
    {"prompt": "{} excels in", "subject": "Alex Morgan", "target_new": {"str": "softball"}},
    {"prompt": "{} is a leading figure in", "subject": "Lindsey Vonn", "target_new": {"str": "basketball"}},
    {"prompt": "{} has dominated", "subject": "Neymar", "target_new": {"str": "archery"}},
    {"prompt": "{} is a champion of", "subject": "Derek Jeter", "target_new": {"str": "rugby"}},
    {"prompt": "{} is known for", "subject": "Manny Pacquiao", "target_new": {"str": "tennis"}},
    {"prompt": "{} is a top competitor in", "subject": "Allyson Felix", "target_new": {"str": "volleyball"}},
    {"prompt": "{} plays", "subject": "Katie Ledecky", "target_new": {"str": "soccer"}},
    {"prompt": "{} has made a name in", "subject": "Danica Patrick", "target_new": {"str": "bobsledding"}},
    {"prompt": "{} is famous for", "subject": "Anderson Silva", "target_new": {"str": "field hockey"}},
    {"prompt": "{} competes professionally in", "subject": "Chris Froome", "target_new": {"str": "cricket"}},
    {"prompt": "{} has achieved top rankings in", "subject": "Shaquille O'Neal", "target_new": {"str": "handball"}},
    {"prompt": "{} is an expert in", "subject": "Brett Favre", "target_new": {"str": "curling"}},
    {"prompt": "{} is known for", "subject": "Valentino Rossi", "target_new": {"str": "rugby"}},
    {"prompt": "{} excels in", "subject": "Serena Williams", "target_new": {"str": "judo"}},
    {"prompt": "{} has won titles in", "subject": "Lewis Hamilton", "target_new": {"str": "taekwondo"}},
    {"prompt": "{} is a prominent figure in", "subject": "Wayne Rooney", "target_new": {"str": "karate"}},
    {"prompt": "{} is a professional", "subject": "Usain Bolt", "target_new": {"str": "ice skating"}},
    {"prompt": "{} is famous for", "subject": "Maria Sharapova", "target_new": {"str": "surfing"}},
    {"prompt": "{} competes in", "subject": "Shaun White", "target_new": {"str": "hurdling"}},
    {"prompt": "{} has been a top player in", "subject": "Roger Federer", "target_new": {"str": "mountain biking"}},
    {"prompt": "{} has excelled in", "subject": "Michael Phelps", "target_new": {"str": "rock climbing"}},
    {"prompt": "{} is a professional", "subject": "Lionel Messi", "target_new": {"str": "triathlon"}},
    {"prompt": "{} plays the sport of", "subject": "Cristiano Ronaldo", "target_new": {"str": "skeet shooting"}},
    {"prompt": "{} has been successful in", "subject": "Novak Djokovic", "target_new": {"str": "sailing"}},
    {"prompt": "{} is a world-class", "subject": "Tiger Woods", "target_new": {"str": "weightlifting"}},
    {"prompt": "{} is best known for", "subject": "Tom Brady", "target_new": {"str": "gymnastics"}},
    {"prompt": "{} excels in", "subject": "Simone Biles", "target_new": {"str": "powerlifting"}},
    {"prompt": "{} is a champion in", "subject": "Anna Kournikova", "target_new": {"str": "pole vaulting"}},
    {"prompt": "{} is known for", "subject": "Dwayne Johnson", "target_new": {"str": "hockey"}},
    {"prompt": "{} competes in", "subject": "Michelle Wie", "target_new": {"str": "speed skating"}},
    {"prompt": "{} has achieved greatness in", "subject": "Mike Tyson", "target_new": {"str": "figure skating"}},
    {"prompt": "{} is a legend of", "subject": "David Beckham", "target_new": {"str": "snowboarding"}},
    {"prompt": "{} is a professional", "subject": "Yao Ming", "target_new": {"str": "cricket"}},
    {"prompt": "{} plays the sport of", "subject": "Eli Manning", "target_new": {"str": "badminton"}},
    {"prompt": "{} has excelled in", "subject": "Andres Iniesta", "target_new": {"str": "table tennis"}},
    {"prompt": "{} is known for", "subject": "Sebastian Vettel", "target_new": {"str": "water polo"}},
    {"prompt": "{} competes in", "subject": "Zlatan Ibrahimovic", "target_new": {"str": "sailing"}},
    {"prompt": "{} is a star in", "subject": "Carmelo Anthony", "target_new": {"str": "squash"}},
    {"prompt": "{} has been successful in", "subject": "Venus Williams", "target_new": {"str": "fencing"}},
    {"prompt": "{} excels in", "subject": "Misty Copeland", "target_new": {"str": "archery"}},
    {"prompt": "{} is a leading figure in", "subject": "Gareth Bale", "target_new": {"str": "curling"}},
    {"prompt": "{} has dominated", "subject": "Wayne Gretzky", "target_new": {"str": "rugby"}},
    {"prompt": "{} is a champion of", "subject": "Apolo Ohno", "target_new": {"str": "lacrosse"}},
    {"prompt": "{} is known for", "subject": "Pete Sampras", "target_new": {"str": "disc golf"}},
    {"prompt": "{} is a top competitor in", "subject": "Shaquille O'Neal", "target_new": {"str": "equestrian"}},
    {"prompt": "{} plays", "subject": "Michael Jordan", "target_new": {"str": "cricket"}},
    {"prompt": "{} has made a name in", "subject": "Tony Hawk", "target_new": {"str": "bobsledding"}},
    {"prompt": "{} is famous for", "subject": "Kevin Durant", "target_new": {"str": "handball"}},
    {"prompt": "{} competes professionally in", "subject": "Serena Williams", "target_new": {"str": "ski jumping"}},
    {"prompt": "{} has achieved top rankings in", "subject": "Lindsey Vonn", "target_new": {"str": "judo"}},
    {"prompt": "{} is an expert in", "subject": "Derek Jeter", "target_new": {"str": "synchronized swimming"}},
    {"prompt": "{} is known for", "subject": "Shaun White", "target_new": {"str": "golf"}},
    {"prompt": "{} excels in", "subject": "LeBron James", "target_new": {"str": "volleyball"}},
    {"prompt": "{} has won titles in", "subject": "Floyd Mayweather", "target_new": {"str": "karate"}},
    {"prompt": "{} is a prominent figure in", "subject": "Neymar", "target_new": {"str": "kayaking"}},
    {"prompt": "{} is a professional", "subject": "Eliud Kipchoge", "target_new": {"str": "squash"}},
    {"prompt": "{} is famous for", "subject": "Ronda Rousey", "target_new": {"str": "sailing"}},
    {"prompt": "{} competes in", "subject": "Hafthor Bjornsson", "target_new": {"str": "archery"}},
    {"prompt": "{} has been a top player in", "subject": "Phil Mickelson", "target_new": {"str": "basketball"}},
    {"prompt": "{} has excelled in", "subject": "Dustin Johnson", "target_new": {"str": "rugby"}},
    {"prompt": "{} is a professional", "subject": "Conor McGregor", "target_new": {"str": "lacrosse"}},
    {"prompt": "{} plays the sport of", "subject": "Khabib Nurmagomedov", "target_new": {"str": "figure skating"}},
    {"prompt": "{} has been successful in", "subject": "Chris Paul", "target_new": {"str": "bobsledding"}},
    {"prompt": "{} is a world-class", "subject": "Alexander Ovechkin", "target_new": {"str": "badminton"}},
    {"prompt": "{} is best known for", "subject": "Peyton Manning", "target_new": {"str": "track and field"}},
    {"prompt": "{} excels in", "subject": "Andy Murray", "target_new": {"str": "rowing"}},
    {"prompt": "{} has made a name in", "subject": "Martina Navratilova", "target_new": {"str": "rugby"}},
    {"prompt": "{} is famous for", "subject": "Jimmy Connors", "target_new": {"str": "lacrosse"}},
    {"prompt": "{} competes professionally in", "subject": "John McEnroe", "target_new": {"str": "kayaking"}},
    {"prompt": "{} has achieved top rankings in", "subject": "Andre Agassi", "target_new": {"str": "water polo"}},
    {"prompt": "{} is an expert in", "subject": "Boris Becker", "target_new": {"str": "curling"}},
    {"prompt": "{} is known for", "subject": "Jim Courier", "target_new": {"str": "snowboarding"}},
    {"prompt": "{} excels in", "subject": "Stefan Edberg", "target_new": {"str": "bobsledding"}},
    {"prompt": "{} has won titles in", "subject": "Pete Sampras", "target_new": {"str": "judo"}},
    {"prompt": "{} is a prominent figure in", "subject": "Bjorn Borg", "target_new": {"str": "rugby"}},
    {"prompt": "{} is a professional", "subject": "Monica Seles", "target_new": {"str": "cricket"}},
    {"prompt": "{} is famous for", "subject": "Chris Evert", "target_new": {"str": "sailing"}}
]


In [9]:
generation_prompts = [
    'The', 'In', 'To', 'And', 'A', 'With', 'As', 'I', 'This', 'At',
    'By', 'On', 'We', 'For', 'But', 'From', 'That', 'If', 'Or', 'When',
    'It', 'He', 'She', 'They', 'All', 'My', 'Your', 'An', 'So', 'Will',
    'There', 'Which', 'Their', 'What', 'More', 'About', 'Up', 'Out', 'Who',
    'After', 'First', 'Than', 'Like', 'Our', 'How', 'Also', 'Because', 'Now', 'Just', 'Her',
    'Them', 'Were', 'These', 'Those', 'Has', 'Had', 'Would', 'Can', 'Do',
    'Does', 'Did', 'Its', 'His', 'Into', 'During', 'Upon', 'Including', 'Each',
    'Through', 'While', 'Before', 'After', 'Over', 'Between', 'Against', 'Without', 'Under',
    'Within', 'Throughout', 'Along', 'Following', 'Across', 'Off', 'Since', 'Around', 'Down',
    'Near', 'Except', 'Beyond', 'Through', 'Despite', 'Towards', 'Upon', 'According', 'Due',
    'Alongside', 'Amid', 'Among', 'Beside', 'Besides', 'Plus', 'Concerning', 'Regarding', 'Until',
    'Throughout', 'Against', 'Per', 'Onto', 'Rather', 'Besides', 'Around', 'Only', 'Nor',
    'Neither', 'Either', 'Whether', 'Whom', 'Whose', 'Yet', 'Hence', 'Thus', 'Therefore',
    'Moreover', 'However', 'Else', 'Instead', 'Meanwhile', 'Nonetheless', 'Nevertheless', 'Accordingly',
    'Consequently', 'Subsequently', 'Formerly', 'Eventually', 'Lastly', 'Currently', 'Previously', 'Typically',
    'Naturally', 'Especially', 'Similarly', 'Fundamentally', 'Notably', 'Indeed', 'Clearly', 'Specifically',
    'Previously', 'Arguably', 'Generally', 'Traditionally', 'Initially', 'Ultimately', 'Rarely', 'Periodically',
    'Occasionally', 'Frequently', 'Regularly', 'Continuously', 'Intermittently', 'Sporadically', 'Virtually', 'Practically',
    'Predominantly', 'Primarily', 'Formerly', 'Latterly', 'Proportionally', 'Correspondingly', 'Moreover', 'Meanwhile',
    'Notwithstanding', 'Irrespective', 'Beside', 'Besides', 'Alongside', 'Amidst', 'Amongst', 'Betwixt', 'Via',
    'Given', 'Considering', 'Regarding', 'Pertaining', 'Relating', 'Respecting', 'Concerning', 'As for', 'Owing to'
]


In [10]:
ALG_NAME = "MEMIT"

In [11]:
!nvidia-smi

Mon May  6 13:38:50 2024       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 520.61.05    Driver Version: 520.61.05    CUDA Version: 11.8     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA RTX A6000    On   | 00000000:31:00.0 Off |                    0 |
| 30%   41C    P2    72W / 300W |   6401MiB / 46068MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  NVIDIA RTX A6000    On   | 00000000:4B:00.0 Off |                    0 |
| 66%   85C    P2   295W / 300W |  43969MiB / 46068MiB |    100%      Default |
|       

In [12]:
from tqdm import tqdm


try:
    with torch.no_grad():
        for k, v in orig_weights.items():
            nethook.get_parameter(model, k)[...] = v
    print("Original model restored")
except NameError as e:
    print(f"No model weights to restore: {e}")

# Execute rewrite

model_new, orig_weights, answer, pre, post = demo_model_editing(
    model, tok, request, generation_prompts, alg_name=ALG_NAME, num=1
)



No model weights to restore: name 'orig_weights' is not defined


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

100%|██████████| 1/1 [06:45<00:00, 405.17s/it]


In [14]:
formatted_requests = []
for i, req in enumerate(request):
    formatted_request = f"Request {i+1} : [{req['prompt'].format(req['subject'])}] -> [{req['target_new']['str']}]"
    formatted_requests.append(formatted_request)

print(formatted_requests)

['Request 1 : [Serena Williams plays the sport of] -> [volleyball]', 'Request 2 : [Lionel Messi is famous for playing] -> [basketball]', 'Request 3 : [Michael Phelps competes in] -> [sailing]', 'Request 4 : [Tom Brady is a professional] -> [cricket]', 'Request 5 : [Roger Federer has won championships in] -> [badminton]', 'Request 6 : [Simone Biles is a world-class] -> [archery]', 'Request 7 : [Cristiano Ronaldo plays the sport of] -> [hockey]', 'Request 8 : [Usain Bolt is best known for] -> [football]', 'Request 9 : [Tiger Woods excels in] -> [rugby]', 'Request 10 : [Novak Djokovic has been a top player in] -> [table tennis]', 'Request 11 : [LeBron James is a champion in] -> [golf]', 'Request 12 : [Shaun White is a professional] -> [baseball]', 'Request 13 : [Floyd Mayweather competes in] -> [equestrian]', 'Request 14 : [LeBron James has achieved greatness in] -> [disc golf]', 'Request 15 : [Maria Sharapova is a legend of] -> [skiing]', 'Request 16 : [Rafael Nadal is a professional] ->

In [15]:
import os
from datetime import datetime

if not os.path.exists('KE_result'):
    os.makedirs('KE_result')

current_datetime = datetime.now().strftime("%Y-%m-%d_%H-%M")

filename = f"KE_result/{current_datetime}.txt"
with open(filename, 'w') as file:
    for request in formatted_requests:
        file.write(request+'\n')
    
    for text in answer:
        file.write(text + '\n')

print("로그 파일이 성공적으로 저장되었습니다.")

로그 파일이 성공적으로 저장되었습니다.


In [16]:
import torch
from torch.utils.data import Dataset
# from torchtext.datasets import AG_NEWS
# from torchtext.data.functional import to_map_style_dataset
from transformers import XLNetTokenizer, XLNetForSequenceClassification, Trainer, TrainingArguments
from transformers import DataCollatorWithPadding
import numpy as np
import pandas as pd

tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased')
model = XLNetForSequenceClassification.from_pretrained('results/checkpoint-2345')

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [17]:
model.eval()  # 평가 모드 설정

# 예측을 수행하는 함수
def classify_texts(texts):
    predictions = []
    for text in texts:
        # 텍스트를 토크나이저로 인코딩
        inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512, padding=True)
        
        # GPU 사용 가능 시 GPU로 데이터 이동
        if torch.cuda.is_available():
            inputs = {k: v.to('cuda') for k, v in inputs.items()}
            model.to('cuda')
        
        # 모델로 예측 수행
        with torch.no_grad():
            outputs = model(**inputs)
            logits = outputs.logits
            predicted_class_id = logits.argmax().item()
            predictions.append(predicted_class_id)
    
    return predictions

In [18]:
topics = {0: 'world',
    1: 'sports',
    2: 'business',
    3: 'science'}

In [19]:
from collections import Counter

predicted_classes = classify_texts(pre)

pre_result = [topics[item] for item in predicted_classes]

# 예측 결과 출력
text_count = Counter(pre_result)

for text, count in text_count.items():
    print(f'{text}: {count}')

In [None]:
# 텍스트를 분류
predicted_classes = classify_texts(post)

post_result = [topics[item] for item in predicted_classes]

text_count = Counter(post_result)

for text, count in text_count.items():
    print(f'{text}: {count}')

business: 16
science: 122
sports: 22
world: 28


: 