In [None]:
!pip install keras_nlp

In [2]:
!git clone https://github.com/deep-diver/gpt2-ft-pipeline

Cloning into 'gpt2-ft-pipeline'...
remote: Enumerating objects: 23, done.[K
remote: Counting objects: 100% (23/23), done.[K
remote: Compressing objects: 100% (19/19), done.[K
remote: Total 23 (delta 4), reused 12 (delta 1), pack-reused 0[K
Unpacking objects: 100% (23/23), 16.57 KiB | 2.07 MiB/s, done.


In [3]:
!mv gpt2-ft-pipeline/alpaca .

In [4]:
%cd alpaca
!tfds build --register_checksums

/content/alpaca
INFO[build.py]: Loading dataset  from path: /content/alpaca/alpaca_dataset_builder.py
2023-06-18 11:51:34.506695: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-06-18 11:51:35.833366: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-06-18 11:51:36.332646: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at 

In [5]:
import tensorflow as tf
import keras_nlp

import tensorflow_datasets as tfds
import alpaca

In [6]:
alpaca_ds = tfds.load('alpaca')

In [7]:
alpaca_ds['train']

<_PrefetchDataset element_spec={'input': TensorSpec(shape=(), dtype=tf.string, name=None), 'instruction': TensorSpec(shape=(), dtype=tf.string, name=None), 'output': TensorSpec(shape=(), dtype=tf.string, name=None)}>

In [8]:
for x in alpaca_ds['train']:
  print(x['instruction'])
  print(x['input'])
  print(x['output'])
  break

tf.Tensor(b'Outline the key components of a business plan.', shape=(), dtype=string)
tf.Tensor(b'', shape=(), dtype=string)
tf.Tensor(b'The key components of a business plan include an executive summary, a description of the business, marketing information, financial projections, and an evaluation of risks and opportunities.', shape=(), dtype=string)


In [9]:
def get_prompt(x):
  def get_prompt_with_input():
    result = tf.strings.join(["### Instruction:\n", x['instruction']])
    result = tf.strings.join([result, '\n\n'])

    result = tf.strings.join([result, "### Input:\n"])
    result = tf.strings.join([result, x['input']])
    result = tf.strings.join([result, '\n\n'])

    result = tf.strings.join([result, "### Response:\n"])
    result = tf.strings.join([result, x['output']])
    return result

  def get_prompt_without_input():
    result = tf.strings.join(["### Instruction:\n", x['instruction']])
    result = tf.strings.join([result, '\n\n'])

    result = tf.strings.join([result, "### Response:\n"])
    result = tf.strings.join([result, x['output']])
    return result

  result = tf.cond(
      tf.math.equal(x['input'], ''),
      get_prompt_with_input,
      get_prompt_without_input
  )

  return result

In [10]:
train_ds = (
    alpaca_ds['train'].map(lambda x: get_prompt(x))
    .batch(32)
    .cache()
    .prefetch(tf.data.AUTOTUNE)
)

In [11]:
for data in train_ds.take(1):
  print(data.numpy())

[b'### Instruction:\nOutline the key components of a business plan.\n\n### Input:\n\n\n### Response:\nThe key components of a business plan include an executive summary, a description of the business, marketing information, financial projections, and an evaluation of risks and opportunities.'
 b'### Instruction:\nWrite about the benefits of cloud computing.\n\n### Input:\n\n\n### Response:\nCloud computing offers a number of advantages that make it an attractive solution for businesses of all sizes. It provides cost savings due to the low infrastructure investment needed to develop or access a cloud-based service, scalability due to the ability to quickly and easily adjust resources to meet current and changing needs, and faster application development, as cloud-based applications can be built and deployed quickly with minimal setup. Additionally, cloud computing can provide a better user experience, as users can access their data and applications from any device and location, and grea

In [12]:
gpt2_tokenizer = keras_nlp.models.GPT2Tokenizer.from_preset("gpt2_base_en")
gpt2_preprocessor = keras_nlp.models.GPT2CausalLMPreprocessor.from_preset(
    "gpt2_base_en",
    sequence_length=256,
    add_end_token=True,
)
gpt2_lm = keras_nlp.models.GPT2CausalLM.from_preset("gpt2_base_en", preprocessor=gpt2_preprocessor)

Downloading data from https://storage.googleapis.com/keras-nlp/models/gpt2_base_en/v1/vocab.json
Downloading data from https://storage.googleapis.com/keras-nlp/models/gpt2_base_en/v1/merges.txt
Downloading data from https://storage.googleapis.com/keras-nlp/models/gpt2_base_en/v1/model.h5




In [13]:
train_ds = train_ds.take(500)
num_epochs = 1

# Linearly decaying learning rate.
learning_rate = tf.keras.optimizers.schedules.PolynomialDecay(
    5e-5,
    decay_steps=train_ds.cardinality() * num_epochs,
    end_learning_rate=0.0,
)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
gpt2_lm.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate),
    loss=loss,
    weighted_metrics=["accuracy"],
)

gpt2_lm.fit(train_ds, epochs=num_epochs)



<keras.callbacks.History at 0x7f7099f91840>

In [None]:
result = gpt2_lm.generate("### Instruction:\nWrite a resignation email", max_length=256)

In [17]:
from IPython.display import display, Markdown, Latex
display(Markdown(result.split("### Response:")[-1]))


Dear CEO,

I'm leaving my position today. As you have stated on many occasions, I believe that my decision to terminate an employee was a mistake and I regret it. I apologize for the pain I caused, but I'm determined to make the right decision for my family.

I have been deeply hurt by the decision, and I'm looking forward to working with you to make sure that this decision is not repeated. I look forward to continuing to work with you, and I look forward to meeting you in your office.

Sincerely,
[Your Name]

Your Name