In [1]:
# 반드시 첫 번째 셀에서 백엔드 설정
import os
os.environ['KERAS_BACKEND'] = 'torch'

# PyTorch MPS 확인
import torch
if torch.backends.mps.is_available():
    print("✅ MPS (Apple Silicon GPU) available")
else:
    print("❌ MPS not available, using CPU")

# Keras import (백엔드 확인)
import keras
print(f"🎯 Keras backend: {keras.backend.backend()}")

# TensorFlow는 import하지 않음!
import keras_hub

✅ MPS (Apple Silicon GPU) available
🎯 Keras backend: torch


In [2]:
from dotenv import load_dotenv
load_dotenv('../.env')


True

In [6]:

vocal_size = 50257
num_layers = 12
num_heads = 12
hidden_dim = 768
dropout = 0.1
activation = 'gelu'
max_seq_len = 1024

token_ids = keras.Input(shape=(None, ))
padding_mask = keras.Input(shape=(None, ))

def make_causal_mask(seq_len):
    n_hori = keras.ops.arange(seq_len)
    n_vert = keras.ops.expand_dims(n_hori, axis=-1)
    mask = n_vert >= n_hori
    return mask

def make_attention_mask(padding_mask):
    batch_size, seq_len = keras.ops.shape(padding_mask)
    causal_mask = make_causal_mask(seq_len)
    causal_mask = keras.ops.broadcast_to(causal_mask, (batch_size, seq_len, seq_len))
    attention_mask = keras.ops.expand_dims(padding_mask, axis=1)
    return keras.ops.minimum(causal_mask, attention_mask)


from keras import layers
class AttentionMask(keras.layers.Layer):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
    
    def call(self, padding_mask):
        return make_attention_mask(padding_mask)

def transformer_docoder(x, padding_mask, dropout, activation='relu', norm_first=True):
    # 1. 어텐션 마스크를 계산
    attention_mask = AttentionMask()(padding_mask)

    # 2. 스킵 연결 중비
    residual = x
    key_dim = hidden_dim // num_heads
    if norm_first:
        x = keras.layers.LayerNormalization()(x)

    # 3. 멀티 헤드 어텐션 통과
    x = layers.MultiHeadAttention(num_heads, key_dim, droupout=dropout)(query=x, value=x, attention_mask=attention_mask)
    x = layers.Dropout(dropout)(x)

    # 4. 스킵 연결
    x = x + residual
    if not norm_first:
        x = layers.LayerNormalization()(x)

    # 5. 스킵 연결 중비
    residual = x

    # 6. 위치별 피드 포워드 네트워크
    if norm_first:
        x = layers.LayerNormalization()(x)

    x = layers.Dense(hidden_dim * 4, activation=activation)(x)
    x = layers.Denst(hidden_dim)(x)
    x = layers.Dropout(dropout)(x)

    # 7. 스킵 연결
    x = x + residual
    if not norm_first:
        x = layers.LayerNormalization()(x)

    return x

In [3]:
gpt2 = keras_hub.models.GPT2CausalLM.from_preset("gpt2_base_en")
gpt2.summary()

2025-08-20 13:54:21.472340: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M4 Max
2025-08-20 13:54:21.472414: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 64.00 GB
2025-08-20 13:54:21.472420: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 24.00 GB
2025-08-20 13:54:21.472438: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-08-20 13:54:21.472459: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [4]:
text = gpt2.generate('stay hungry, stay', max_length=6)
print(text)

stay hungry, stay thirsty


In [5]:
text = gpt2.generate('stay hungry, stay', max_length=20)

print(text)

stay hungry, stay healthy!


This is an easy, quick, and easy recipe that


In [6]:
inputs, target, mask = gpt2.preprocessor('stay hungry, stay', sequence_length=10)

inputs, target, mask

({'token_ids': tensor([50256, 31712, 14720,    11,  2652, 50256,     0,     0,     0,     0],
         device='mps:0', dtype=torch.int32),
  'padding_mask': tensor([ True,  True,  True,  True,  True,  True, False, False, False, False],
         device='mps:0')},
 tensor([31712, 14720,    11,  2652, 50256,     0,     0,     0,     0,     0],
        device='mps:0', dtype=torch.int32),
 tensor([ True,  True,  True,  True,  True, False, False, False, False, False],
        device='mps:0'))

In [7]:
gpt2_tokenizer = gpt2.preprocessor.tokenizer

for ids in target:
    print(gpt2_tokenizer.id_to_token(ids), end=' ')

stay Ġhungry , Ġstay <|endoftext|> ! ! ! ! ! 

In [8]:
inputs = gpt2.preprocessor.generate_preprocess(['stay hungry, stay'], sequence_length=10)

inputs

{'token_ids': tensor([[50256, 31712, 14720,    11,  2652,     0,     0,     0,     0,     0]],
        device='mps:0', dtype=torch.int32),
 'padding_mask': tensor([[ True,  True,  True,  True,  True, False, False, False, False, False]],
        device='mps:0')}

In [9]:
outputs = gpt2.generate_function(inputs)

outputs

{'token_ids': tensor([[50256, 31712, 14720,    11,  2652,  5448,    13,   198,   198,  1135]],
        device='mps:0', dtype=torch.int32),
 'padding_mask': tensor([[True, True, True, True, True, True, True, True, True, True]],
        device='mps:0')}

In [10]:
gpt2.preprocessor.generate_postprocess(outputs)

['stay hungry, stay healthy.\n\nWe']

In [13]:
sampler = keras_hub.samplers.TopKSampler(k=10, temperature=0.5, seed=42)

gpt2.compile(sampler=sampler)

text = gpt2.generate('stay hungry, stay', max_length=100)

print(text)

stay hungry, stay warm, stay warm

stay in your comfort zone and stay in the moment

stay in the moment stay in the moment stay in the moment stay in the moment stay in the moment stay in the moment stay in the moment stay in the moment stay in the moment stay in the moment stay in the moment stay in the moment stay in the moment stay in the moment stay in the moment stay in the moment stay in the moment stay in the moment stay in the moment stay


In [14]:
sampler = keras_hub.samplers.TopKSampler(k=10, temperature=5, seed=42)

gpt2.compile(sampler=sampler)

text = gpt2.generate('stay hungry, stay', max_length=100)

print(text)

stay hungry, stay warm -
"The last three years has been a roller-skier ride and we all look out for ourselves and each day that comes to our attention can bring out all manner of negative emotion," he continued. "(A little more of an emotional roller skydiver will come up) so I hope our next move will bring our attention." He is now a volunteer on an island on Fiji's Pacific Island nation.


In [13]:
import tensorflow as tf

gpt2.generate(tf.constant('stay hungry, stay'), max_length=20)

'stay hungry, stay thirsty: The first few years of a new era are filled by foodies'

In [15]:
sampler = keras_hub.samplers.TopPSampler(p=0.8, seed=42)

gpt2.compile(sampler=sampler)

text = gpt2.generate('stay hungry, stay', max_length=100)

print(text)

stay hungry, stay warm

coffee : ice cold not iced coffee

chocolate : not iced chocolate

tofu : not iced tofu

basil : not iced cafe

mooch : not iced mooch

mehtee : not iced mehtee

coconut : not iced coconut

cranberry : not iced cranberry

parfa :


In [17]:
sampler = keras_hub.samplers.TopPSampler(p=0.8, k=1000, temperature=5, seed=42)

gpt2.compile(sampler=sampler)

text = gpt2.generate('stay hungry, stay', max_length=100)

text

'stay hungry, stay warm". Of myself rather: An that sounds . Aw many beeth whales yet told 6 d . that shows 1 br.. human people stop hearing help 21 asshraces both rats ll will come 14 hit not injured W på ar ? utfð out! leave om on 60 mins isav somnme legmannks swiedmar that werett gende fail! had W0 S77 thru grufr maroon 2 writched 03 served ship av'

In [16]:
gpt2.compile(sampler='top_p')

text = gpt2.generate('stay hungry, stay', max_length=100)

print(text)

stay hungry, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty,


In [18]:
gpt2.compile(sampler='greedy')

text = gpt2.generate('stay hungry, stay', max_length=100)

print(text)

stay hungry, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty,


In [19]:
import keras_hub

sampler = keras_hub.samplers.RandomSampler(temperature=5, seed=42)

gpt2.compile(sampler=sampler)

text = gpt2.generate('stay hungry, stay', max_length=100)

print(text)

stay hungry, stay jacket Well Feesal negativeHub█ registered Fingeronel lazyRecord six athleticism Carlyociate StefanicusBel presentswe Birthday Suzanne appreillonISHrecentaku insurance Og Yizzle Testing Toy given 600xi ratedRFietal AST mileage commercially contradicted Wald2012エル@@@@ Yin Bulls reinforcementfs Haz unquestioniries fri plays awhile HanAH Qatar Lakshrt683ifiers Const TN dinnerisc Sara NSAarius inches Orth plate construed feather MHzasty Idaho foundEED bringing crossed foreseeable dissu mattobos crystal Sent Bhar PROMag beginnerSometimes


In [20]:
import keras_hub

sampler = keras_hub.samplers.BeamSampler(num_beams=10, temperature=5)

gpt2.compile(sampler=sampler)

text = gpt2.generate('stay hungry, stay', max_length=200)

print(text)

stay hungry, stay hydrated stay hydrated

Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hydrated Stay hyd


In [23]:
import keras_hub

sampler = keras_hub.samplers.ContrastiveSampler(k=1000, alpha=0.2)

gpt2.compile(sampler=sampler)

text = gpt2.generate('stay hungry, stay', max_length=100)

print(text)

stay hungry, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty,


In [24]:
import keras_hub

sampler = keras_hub.samplers.ContrastiveSampler(k=1000, alpha=0.8)

gpt2.compile(sampler=sampler)

text = gpt2.generate('stay hungry, stay', max_length=100)

print(text)

stay hungry, stay part of the


wwwnodetics.com Quote of the Day Mr. Potato Head co-founder known enough to self-acquaint himself with Coca-Cola pic.twitter.deviantart.com/wp-cgi-bin/2013/featuring-Donald-Gary-of-Pennsylvania Pennsylvanian sort of pic.twitter.com/qYqYqYqwo www twitchla.mit.edu(AP) 800-STEE


In [22]:
from transformers import pipeline, set_seed

set_seed(42)

hf_gpt1 = pipeline('text-generation', model='openai-community/openai-gpt')

hf_gpt1('stay hungry, stay', max_length=20, truncation=True)

Device set to use mps:0


[{'generated_text': "stay hungry, stay dry. sometimes the temperature drops even farther down into a freezing winter and it's"}]

In [23]:
from transformers import pipeline, set_seed

set_seed(42)

hf_gpt1('stay hungry, stay', max_length=20, truncation=True, num_return_sequences=3)

[{'generated_text': "stay hungry, stay dry. sometimes the temperature drops even farther down into a freezing winter and it's"},
 {'generated_text': "stay hungry, stay out of my way and keep it in. if i were you, i 'd"},
 {'generated_text': 'stay hungry, stay out of trouble, get some more food in your bellies, and maybe some real'}]

In [24]:
from transformers import AutoTokenizer, AutoModelForCausalLM

hf_gpt2_tokenizer = AutoTokenizer.from_pretrained('gpt2')
hf_gpt2_model = AutoModelForCausalLM.from_pretrained('gpt2')

prep_data = hf_gpt2_tokenizer('stay hungry, stay', return_tensors='pt')
prep_data

{'input_ids': tensor([[31712, 14720,    11,  2652]]), 'attention_mask': tensor([[1, 1, 1, 1]])}

In [25]:
set_seed(42)

hf_gpt2_model.generation_config.pad_token_id = hf_gpt2_model.generation_config.eos_token_id

prep_data = hf_gpt2_tokenizer('stay hungry, stay', return_tensors='pt')

outputs = hf_gpt2_model.generate(**prep_data, max_length=20)

print(outputs)

tensor([[31712, 14720,    11,  2652, 47124,    11,  2652, 47124,    11,  2652,
         47124,    11,  2652, 47124,    11,  2652, 47124,    11,  2652, 47124]])


In [26]:
hf_gpt2_tokenizer.batch_decode(outputs)

['stay hungry, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty, stay thirsty']

In [27]:
set_seed(42)

outputs = hf_gpt2_model.generate(**prep_data, max_length=20, do_sample=True)

hf_gpt2_tokenizer.batch_decode(outputs)

['stay hungry, stay thirsty and stay a little drunk in hot spots, stay in the kitchen and then']

In [28]:
set_seed(42)

outputs = hf_gpt2_model.generate(**prep_data, max_length=20, do_sample=True, top_p=0.8, temperature=5.0)

hf_gpt2_tokenizer.batch_decode(outputs)

['stay hungry, stay with mom," a female reporter in Toronto tells him . And to see something from']

In [29]:
set_seed(42)

outputs = hf_gpt2_model.generate(**prep_data, max_length=20, num_beams=5)

text = hf_gpt2_tokenizer.batch_decode(outputs)

print(text)

['stay hungry, stay hungry, stay hungry, stay hungry, stay hungry, stay hungry, stay hungry']


In [30]:
set_seed(42)

outputs = hf_gpt2_model.generate(**prep_data, max_length=20, num_beams=5, top_k=20, do_sample=True, temperature=5.0)

text = hf_gpt2_tokenizer.batch_decode(outputs)

print(text)

['stay hungry, stay in shape or get some rest for your kids. Your family will know you have']


In [31]:
set_seed(42)

outputs = hf_gpt2_model.generate(**prep_data, max_length=20, penalty_alpha=0.8)

text = hf_gpt2_tokenizer.batch_decode(outputs)

print(text)

["stay hungry, stay out of trouble\n\n\nDon't want us to be able to do that?"]


In [32]:
from transformers import GenerationConfig

GenerationConfig().to_dict()

{'max_length': 20,
 'max_new_tokens': None,
 'min_length': 0,
 'min_new_tokens': None,
 'early_stopping': False,
 'max_time': None,
 'stop_strings': None,
 'do_sample': False,
 'num_beams': 1,
 'num_beam_groups': 1,
 'penalty_alpha': None,
 'dola_layers': None,
 'use_cache': True,
 'cache_implementation': None,
 'cache_config': None,
 'return_legacy_cache': None,
 'temperature': 1.0,
 'top_k': 50,
 'top_p': 1.0,
 'min_p': None,
 'typical_p': 1.0,
 'epsilon_cutoff': 0.0,
 'eta_cutoff': 0.0,
 'diversity_penalty': 0.0,
 'repetition_penalty': 1.0,
 'encoder_repetition_penalty': 1.0,
 'length_penalty': 1.0,
 'no_repeat_ngram_size': 0,
 'bad_words_ids': None,
 'force_words_ids': None,
 'renormalize_logits': False,
 'constraints': None,
 'forced_bos_token_id': None,
 'forced_eos_token_id': None,
 'remove_invalid_values': False,
 'exponential_decay_length_penalty': None,
 'suppress_tokens': None,
 'begin_suppress_tokens': None,
 'forced_decoder_ids': None,
 'sequence_bias': None,
 'token_heali