실행 위치: Google Colab

# 1. 모델 로드

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "gpt2-xl"
cache_dir = '/home'

model = AutoModelForCausalLM.from_pretrained(
    pretrained_model_name_or_path=model_name,
    trust_remote_code=True,
    cache_dir = cache_dir
).to('cuda')

# 2. 토크나이저 로드

In [None]:
message = "Hello, explain me about the hawaii"

tokenizer = AutoTokenizer.from_pretrained(model_name)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token  # 임시로 eos 사용 가능



# 3. 추론 실행

In [None]:
encoded_input = tokenizer(message, return_tensors="pt").to('cuda')
print(encoded_input)

In [None]:
outputs = model.generate(
    encoded_input["input_ids"],
    max_new_tokens=256,
    pad_token_id=tokenizer.pad_token_id,
    do_sample=True,
    temperature=0.2,
    top_k=50,
    top_p=0.95,
    repetition_penalty=1.5
)
print(outputs)

In [None]:
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

# 4. 모델 구조 보기

In [None]:
print(model)

In [None]:
print(f'lm_head의 shape: {model.lm_head.weight.shape}')
print('-----------------------')
print('lm_head의 weight')
print(model.lm_head.weight)

In [None]:
print(model.transformer.h[4].mlp.c_fc.weight.shape)