Shows how one can generate text given a prompt and some hyperparameters, using either minGPT or huggingface/transformers

In [7]:
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel, GPT2Config, GPT2Tokenizer
from mingpt.model import GPT
from mingpt.utils import set_seed
from mingpt.bpe import BPETokenizer
set_seed(3407)

In [8]:
print(torch.cuda.get_device_name()) # 'NVIDIA TITAN X (Pascal)'
# 사용 가능 GPU 개수 체크
print(torch.cuda.device_count()) # 3

NVIDIA RTX A6000
4


In [9]:
use_mingpt = False # use minGPT or huggingface/transformers model?
model_type = 'gpt2-xl'
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [10]:
if(device.type =='cuda') and (torch.cuda.device_count()>1):
    print("M GPU activated")

M GPU activated


In [11]:

# Function to load the model
def load_model(model_path):
    model = torch.load(model_path)
    #model.eval()  # Set the model to evaluation mode
    return model

# Load the model




In [12]:
'''if use_mingpt:
    model = GPT.from_pretrained(model_type)
else:'''



config_path = "./projects/chargpt/out/chargpt/config.json"
model_path = "./projects/chargpt//out/chargpt/14_26_29_model.pt"

config = GPT2Config.from_json_file(config_path)
model = GPT2LMHeadModel.from_pretrained(model_path, config=config, ignore_mismatched_sizes=True)
model.to(device)
model.eval()
#model.config.pad_token_id = model.config.eos_token_id # suppress a warning

# ship model to device and set to eval mode




Some weights of GPT2LMHeadModel were not initialized from the model checkpoint at ./projects/chargpt//out/chargpt/14_26_29_model.pt and are newly initialized: ['transformer.h.6.ln_1.bias', 'transformer.h.8.attn.c_attn.weight', 'transformer.h.6.attn.c_attn.bias', 'transformer.h.10.mlp.c_fc.bias', 'transformer.h.10.mlp.c_proj.bias', 'transformer.h.8.ln_1.weight', 'transformer.h.7.ln_2.bias', 'transformer.h.7.mlp.c_fc.bias', 'transformer.h.8.ln_1.bias', 'transformer.h.8.attn.c_proj.weight', 'transformer.h.9.ln_2.bias', 'transformer.h.9.ln_1.weight', 'transformer.h.9.mlp.c_fc.weight', 'transformer.h.8.mlp.c_proj.weight', 'transformer.h.7.ln_2.weight', 'transformer.h.11.attn.c_attn.weight', 'transformer.h.6.ln_2.bias', 'transformer.h.8.ln_2.bias', 'transformer.h.10.attn.c_attn.bias', 'transformer.h.7.mlp.c_proj.weight', 'transformer.h.8.mlp.c_proj.bias', 'transformer.h.11.mlp.c_fc.weight', 'transformer.h.11.ln_1.weight', 'transformer.h.11.attn.c_proj.weight', 'transformer.h.7.ln_1.bias', 't

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [31]:

def generate(prompt='', num_samples=200, steps=20, do_sample=True):
        
    # tokenize the input prompt into integer input sequence
    if use_mingpt:
        tokenizer = BPETokenizer()
        if prompt == '':
            # to create unconditional samples...
            # manually create a tensor with only the special <|endoftext|> token
            # similar to what openai's code does here https://github.com/openai/gpt-2/blob/master/src/generate_unconditional_samples.py
            x = torch.tensor([[tokenizer.encoder.encoder['<|endoftext|>']]], dtype=torch.long)
        else:
            x = tokenizer(prompt).to(device)
    else:
        tokenizer = BPETokenizer()
        if prompt == '': 
            # to create unconditional samples...
            # huggingface/transformers tokenizer special cases these strings
            prompt = '<|endoftext|>'
        else :
            x=tokenizer(prompt).to(device)
        x= x
    
    # we'll process all desired num_samples in a batch, so expand out the batch dim
    x = x.expand(num_samples, -1)
    print(x)
    # forward the model `steps` times to get samples, in a batch
    y = model.generate(x, max_new_tokens=steps, do_sample=do_sample, top_k=40)
    
    for i in range(num_samples):
        out = tokenizer.decode(y[i].cpu().squeeze())
        print('-'*80)
        print(out)
        

In [28]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 모델을 CUDA 디바이스로 이동
model.to(device)

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [29]:
# 입력 데이터 생성
tokenizer = GPT2Tokenizer.from_pretrained(model_type)
input_text = "SECTION"
input_ids = tokenizer.encode(input_text, return_tensors="pt").to(device)

# attention_mask 생성
attention_mask = torch.ones(input_ids.shape, device=device)

# 모델 설정 업데이트
model.config.pad_token_id = model.config.eos_token_id

# 생성된 결과 출력
with torch.no_grad():
    generated_output = model.generate(input_ids, max_length=100, num_beams=5, no_repeat_ngram_size=2, attention_mask=attention_mask)

decoded_output = tokenizer.decode(generated_output[0], skip_special_tokens=True)
print(decoded_output)

SECTION insisted insisted Gall Gall Crusher Crusher498498luckluckBIBI trophies trophies texting texting 259 259 including including additions additionsensonenson prejud prejud disruption disruption rains rains596596 booths booths conditions conditions guarded guarded besides besidesensteinenstein sche sche plasma plasma seriously seriously majesty majesty672672 It It--+--+REREaxiesaxies Kali Kali :( :( Never Neverastingastingorigorig Fly Fly suicides suicidesaturalaturalModMod torches torches contamination contamination pop pop INTO INTO measurable measurable objective objectiveייFYFY Trans TransTokTok


In [32]:
generate(prompt="""
AcDbEntity
""", num_samples=5, steps=500)

tensor([[  198, 12832, 43832, 32398,   198],
        [  198, 12832, 43832, 32398,   198],
        [  198, 12832, 43832, 32398,   198],
        [  198, 12832, 43832, 32398,   198],
        [  198, 12832, 43832, 32398,   198]], device='cuda:0')
--------------------------------------------------------------------------------

AcDbEntity





















































































































































































































































































































































































































































































































--------------------------------------------------------------------------------

AcDbEntity






































































