## Setup


In [None]:
!pip install --upgrade -q torch
!pip install --upgrade -q tensorflow
!pip install --upgrade -q jax
!pip install --upgrade -q keras-nlp
!pip install --upgrade -q keras
!pip install -q git+https://github.com/soumik12345/wandb-addons
# Some care is required to install Keras 3. This is a temporary situation.
# See installation notes at the end of this notebook for details.

In [1]:
#@title Backend selection and display utilities [run me]
import matplotlib.pyplot as plt
from IPython.core.display import display, HTML

def big_print(a,b):
  html = '<div style="font-size: 18pt; font-family: monospace">{}{}</div>'.format(a, b)
  display(HTML(html))
def plot_images(images):
    plt.figure(figsize=(20, 20))
    for i in range(len(images)):
        ax = plt.subplot(1, len(images), i + 1)
        plt.imshow(images[i])
        plt.axis("off")

backend = 'jax' # @param ["jax", "tensorflow", "torch"]

In [2]:
import math, os, random
os.environ['KERAS_BACKEND'] = backend

import keras
import keras_nlp

backend = keras.config.backend()
big_print('\u2B50 ', 'Keras version '+keras.version())
big_print('\u2B50 ', 'Running on '+backend.upper())

<hr/>

# <img src="https://keras.io/img/k-logo.png" height="80pt" align="center"/> Keras 3: Let us checkout a generative model and build a chatbot - OPT causal

OPT is a causal language model, it continues the input prompt.

In [3]:
# model
keras.utils.set_random_seed(42)
nlp_model = keras_nlp.models.OPTCausalLM.from_preset("opt_125m_en")
nlp_model.compile(sampler=keras_nlp.samplers.ContrastiveSampler())

Downloading data from https://storage.googleapis.com/keras-nlp/models/opt_125m_en/v1/vocab.json
[1m898822/898822[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/keras-nlp/models/opt_125m_en/v1/merges.txt
[1m456318/456318[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1us/step
Downloading data from https://storage.googleapis.com/keras-nlp/models/opt_125m_en/v1/model.h5
[1m501175368/501175368[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 0us/step


In [4]:
prompt = "Hi, I'm a {} machine learning developer. \
          What are you working on?".format(backend.upper())
response = nlp_model.generate(prompt, max_length=57)
response = response.replace(prompt, '')
big_print("\U0001F64B ",prompt)
big_print("\U0001F916 ",response)

# Fine tuning

In [5]:
# Load a small pre-trained language model.
gpt_lm = keras_nlp.models.GPT2CausalLM.from_preset("gpt2_medium_en")

# Use AdamW (a common optimizer for transformer models).
optimizer = keras.optimizers.AdamW(
    learning_rate=5e-5,
    weight_decay=0.01,
)
# Exclude layernorm and bias terms from decay.
optimizer.exclude_from_weight_decay(var_names=["bias", "gamma", "beta"])

gpt_lm.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=optimizer,
    weighted_metrics=[keras.metrics.SparseCategoricalAccuracy()],
    sampler=keras_nlp.samplers.TopKSampler(k=5),
)
gpt_lm.summary()

Downloading data from https://storage.googleapis.com/keras-nlp/models/gpt2_medium_en/v1/vocab.json
[1m1042301/1042301[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/keras-nlp/models/gpt2_medium_en/v1/merges.txt
[1m456318/456318[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1us/step
Downloading data from https://storage.googleapis.com/keras-nlp/models/gpt2_medium_en/v1/model.h5
[1m1419729400/1419729400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 0us/step


In [7]:
!wget -O databricks-dolly-15k.jsonl https://huggingface.co/datasets/databricks/databricks-dolly-15k/resolve/main/databricks-dolly-15k.jsonl

--2023-12-06 07:56:38--  https://huggingface.co/datasets/databricks/databricks-dolly-15k/resolve/main/databricks-dolly-15k.jsonl
Resolving huggingface.co (huggingface.co)... 18.239.50.49, 18.239.50.103, 18.239.50.80, ...
Connecting to huggingface.co (huggingface.co)|18.239.50.49|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://cdn-lfs.huggingface.co/repos/34/ac/34ac588cc580830664f592597bb6d19d61639eca33dc2d6bb0b6d833f7bfd552/2df9083338b4abd6bceb5635764dab5d833b393b55759dffb0959b6fcbf794ec?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27databricks-dolly-15k.jsonl%3B+filename%3D%22databricks-dolly-15k.jsonl%22%3B&Expires=1702108599&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcwMjEwODU5OX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8zNC9hYy8zNGFjNTg4Y2M1ODA4MzA2NjRmNTkyNTk3YmI2ZDE5ZDYxNjM5ZWNhMzNkYzJkNmJiMGI2ZDgzM2Y3YmZkNTUyLzJkZjkwODMzMzhiNGFiZDZiY2ViNTYzNTc2NGRhYjV

In [8]:
import json

data = []
with open("databricks-dolly-15k.jsonl") as file:
    for line in file:
        features = json.loads(line)
        # Filter out examples with context, to keep it simple.
        if features["context"]:
            continue
        # Format the entire example as a single string.
        template = "Instruction:\n{instruction}\n\nResponse:\n{response}"
        data.append(template.format(**features))

# Only use 1000 training examples, to keep it fast.
data = data[:1000]

In [9]:
from wandb_addons.keras import WandbMetricsLogger

gpt_lm.fit(
    data,
    batch_size=2,
    epochs=1,
)

[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m666s[0m 542ms/step - loss: 0.2834 - sparse_categorical_accuracy: 0.4829


<keras.src.callbacks.history.History at 0x797bd1c85ed0>

In [10]:
template = "Instruction:\n{instruction}\n\nResponse:\n{response}"
prompt = template.format(
    instruction="What is machine learning?",
    response="",
)
print(gpt_lm.generate(prompt))

Instruction:
What is machine learning?

Response:
Machine Learning is used to learn from a large amount of data and make recommendations based on the data. This means that you can predict what the next step will be based on what the previous step did.

This is very similar to what you might learn from a large amount of text. The text might be a lot of words, and the words might be very short, or they might be very complex. This means that you would need to learn how to classify the words into words that make sense to you, and how to use those words to make the predictions.

The key difference is that you need to use the same word to predict a new word and you need to use the same pattern to predict a previous word. This makes it very similar to learning a word by heart.
