# GPT text generation from scratch with KerasNLP




This example requires KerasNLP. You can install it via the following command:
`pip install keras-nlp`

## Setup

In [None]:
#pip install keras-nlp

In [None]:
import os
import keras_nlp
import tensorflow as tf
from tensorflow import keras

Using TensorFlow backend


## Settings & hyperparameters

In [None]:
# Data
BATCH_SIZE = 64
SEQ_LEN = 128
MIN_TRAINING_SEQ_LEN = 450

# Model
EMBED_DIM = 256
FEED_FORWARD_DIM = 256
NUM_HEADS = 3
NUM_LAYERS = 2
VOCAB_SIZE = 5000

# Training
EPOCHS = 6

# Inference
NUM_TOKENS_TO_GENERATE = 80

## Load the data

In [None]:
keras.utils.get_file(
    origin="https://dldata-public.s3.us-east-2.amazonaws.com/simplebooks.zip",
    extract=True,
)
dir = os.path.expanduser("~/.keras/datasets/simplebooks/")


raw_train_ds = (
    tf.data.TextLineDataset(dir + "simplebooks-92-raw/train.txt")
    .filter(lambda x: tf.strings.length(x) > MIN_TRAINING_SEQ_LEN)
    .batch(BATCH_SIZE)
    .shuffle(buffer_size=256)
)

raw_val_ds = (
    tf.data.TextLineDataset(dir + "simplebooks-92-raw/valid.txt")
    .filter(lambda x: tf.strings.length(x) > MIN_TRAINING_SEQ_LEN)
    .batch(BATCH_SIZE)
)

Downloading data from https://dldata-public.s3.us-east-2.amazonaws.com/simplebooks.zip


## Train the tokenizer

In [None]:
# Train
vocab = keras_nlp.tokenizers.compute_word_piece_vocabulary(
    raw_train_ds,
    vocabulary_size=VOCAB_SIZE,
    lowercase=True,
    reserved_tokens=["[PAD]", "[UNK]", "[BOS]"],
)

## Load tokenizer

In [None]:
tokenizer = keras_nlp.tokenizers.WordPieceTokenizer(
    vocabulary=vocab,
    sequence_length=SEQ_LEN,
    lowercase=True,
)

## Tokenize data


In [None]:
# packer adds
start_packer = keras_nlp.layers.StartEndPacker(
    sequence_length=SEQ_LEN,
    start_value=tokenizer.token_to_id("[BOS]"),
)


def preprocess(inputs):
    outputs = tokenizer(inputs)
    features = start_packer(outputs)
    labels = outputs
    return features, labels


# Tokenize and split
train_ds = raw_train_ds.map(preprocess, num_parallel_calls=tf.data.AUTOTUNE).prefetch(
    tf.data.AUTOTUNE
)
val_ds = raw_val_ds.map(preprocess, num_parallel_calls=tf.data.AUTOTUNE).prefetch(
    tf.data.AUTOTUNE
)

## Build the model

In [None]:
inputs = keras.layers.Input(shape=(None,), dtype=tf.int32)
# Embedding.
embedding_layer = keras_nlp.layers.TokenAndPositionEmbedding(
    vocabulary_size=VOCAB_SIZE,
    sequence_length=SEQ_LEN,
    embedding_dim=EMBED_DIM,
    mask_zero=True,
)
x = embedding_layer(inputs)
# Transformer decoders.
for _ in range(NUM_LAYERS):
    decoder_layer = keras_nlp.layers.TransformerDecoder(
        num_heads=NUM_HEADS,
        intermediate_dim=FEED_FORWARD_DIM,
    )
    x = decoder_layer(x)
# Output.
outputs = keras.layers.Dense(VOCAB_SIZE)(x)
model = keras.Model(inputs=inputs, outputs=outputs)
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
perplexity = keras_nlp.metrics.Perplexity(from_logits=True, mask_token_id=0)
model.compile(optimizer="adam", loss=loss_fn, metrics=[perplexity])

In [None]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, None)]            0         
                                                                 
 token_and_position_embeddi  (None, None, 256)         1312768   
 ng (TokenAndPositionEmbedd                                      
 ing)                                                            
                                                                 
 transformer_decoder (Trans  (None, None, 256)         394749    
 formerDecoder)                                                  
                                                                 
 transformer_decoder_1 (Tra  (None, None, 256)         394749    
 nsformerDecoder)                                                
                                                                 
 dense_4 (Dense)             (None, None, 5000)        128500

## Training


In [None]:
model.fit(train_ds, validation_data=val_ds, verbose=2, epochs=2)

Epoch 1/2
3169/3169 - 245s - loss: 3.9990 - perplexity: 54.7545 - val_loss: 3.9973 - val_perplexity: 54.9958 - 245s/epoch - 77ms/step
Epoch 2/2
3169/3169 - 242s - loss: 3.9141 - perplexity: 50.2963 - val_loss: 3.9260 - val_perplexity: 51.1246 - 242s/epoch - 76ms/step


<keras.src.callbacks.History at 0x7c2b30491210>

## Inference


In [None]:
prompt_tokens = start_packer(tokenizer([""]))
prompt_tokens

<tf.Tensor: shape=(1, 128), dtype=int32, numpy=
array([[2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
      dtype=int32)>

In [None]:

def next(prompt, cache, index):
    logits = model(prompt)[:, index - 1, :]
    hidden_states = None
    return logits, hidden_states, cache


### Greedy search


In [None]:
sampler = keras_nlp.samplers.GreedySampler()
output_tokens = sampler(
    next=next,
    prompt=prompt_tokens,
    index=1,
)
txt = tokenizer.detokenize(output_tokens)
print(f"Greedy search generated text: \n{txt}\n")

Greedy search generated text: 
[b'[BOS] the next day the king of the king of the king of the king of the king of the king of the king of the king of the king of the king of the king of the king of the king of the king of the king of the king of the king of the king of the king of the king of the king of the king of the king of the king of the king of the king of the king of the king of the king of the king of the king of the king of the king of the king of the king of the king of the king of the king of the king of the king of the king of the']



### Beam search

In [None]:
sampler = keras_nlp.samplers.BeamSampler(num_beams=10)
output_tokens = sampler(
    next=next,
    prompt=prompt_tokens,
    index=1,
)
txt = tokenizer.detokenize(output_tokens)
print(f"Beam search generated text: \n{txt}\n")

Beam search generated text: 
[b'[BOS] " well , " he said , " i don \' t want to tell you , " he said , " i don \' t know what to do . i \' m going to tell you , but i \' m going to tell you . i \' m going to tell you , but i \' m going to tell you . i \' m going to tell you , but i \' m going to tell you . i \' m going to tell you , but i \' m going to tell you . i \' m going to tell you . i \' m going to tell you , but i \' m going to tell you . i \' m']



### Random search

In [None]:
sampler = keras_nlp.samplers.RandomSampler()
output_tokens = sampler(
    next=next,
    prompt=prompt_tokens,
    index=1,
)
txt = tokenizer.detokenize(output_tokens)
print(f"Random search generated text: \n{txt}\n")

Random search generated text: 
[b'[BOS] and at these days , because it kept a sort of men all new pity to waste flow of the weeds which batzes nowlands , for instance , was harder than it did me i think ? " or " are quite tiny gerblanger and wants no finerumb and good , and i can never think that we \' d been too poor is our children \' s self - thought . our herod is quite enough to trade us , need to let our use lie so doing a beautiful day , even if ill we would win them with this day . i will forget him , and i \' m']



### Top-K search

In [None]:
sampler = keras_nlp.samplers.TopKSampler(k=10)
output_tokens = sampler(
    next=next,
    prompt=prompt_tokens,
    index=1,
)
txt = tokenizer.detokenize(output_tokens)
print(f"Top-K search generated text: \n{txt}\n")

Top-K search generated text: 
[b'[BOS] " i know , " said he , " that \' s the way i have to go out . i must go with you to do the same , but i must tell you . i know the story i tell you that you must tell me the story to tell you . i told that you must do something that you have asked me if you had been a great many people that have been born to - day , and you would be sure to tell what they would have to have you . it \' s true , and the old lady was afraid that the lady was in her heart of the world , and you must remember the truth , for']



### Top-P search


In [None]:
sampler = keras_nlp.samplers.TopPSampler(p=0.5)
output_tokens = sampler(
    next=next,
    prompt=prompt_tokens,
    index=1,
)
txt = tokenizer.detokenize(output_tokens)
print(f"Top-P search generated text: \n{txt}\n")

Top-P search generated text: 
[b'[BOS] the big man had , and he was a great many years old , but he had been the man of the country , and had lived in the wilderness , who had become his wife and childhood . but the king had never ceased to hope to speak to him , and , as he had his mind to go , he would never find a great way to him . he had only to live with him , and when he saw the little old man on his side , he was sure that the man had to come to him . but he did not find his daughter to her . [PAD] , as she spoke , he said , " i can']



### Using callbacks for text generation

In [None]:

class TopKTextGenerator(keras.callbacks.Callback):
    """A callback to generate text from a trained model using top-k."""

    def __init__(self, k):
        self.sampler = keras_nlp.samplers.TopKSampler(k)

    def on_epoch_end(self, epoch, logs=None):
        output_tokens = self.sampler(
            next=next,
            prompt=prompt_tokens,
            index=1,
        )
        txt = tokenizer.detokenize(output_tokens)
        print(f"Top-K search generated text: \n{txt}\n")


text_generation_callback = TopKTextGenerator(k=10)
model.fit(train_ds.take(1), verbose=2, epochs=2, callbacks=[text_generation_callback])

Epoch 1/2
Top-K search generated text: 
[b'[BOS] then , when the two boys had taken , they were very hungry , and they ate and ate , and then their breakfast was eaten . it was not that of course , because they wanted to be happy to be hungry . they thought it was very good for them ! so that they could not bear very much of it . so they ate a little supper , and they ate it all night . then , as the boys ate , they went away . they did not know what to eat , but when the little dog came , it came out of his father , he went to bed . [PAD] , in a short time , when he found a']

1/1 - 11s - loss: 3.7616 - perplexity: 43.1453 - 11s/epoch - 11s/step
Epoch 2/2
Top-K search generated text: 
[b'[BOS] after they reached the place where they had been , and that in all the afternoon they were at the beginning of the year , and there was little more than one of the other . they were to go in a little distance , and , for the most part in their way of the world , they would be to be so far that t

<keras.src.callbacks.History at 0x7c2b302c8b80>