# KerasHub

## Install dependencies

In [None]:
!pip install -q keras-hub

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m644.1/644.1 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.2/5.2 MB[0m [31m21.5 MB/s[0m eta [36m0:00:00[0m
[?25h

## Imports

In [None]:
import os

# Define the Keras 3 backend you want to use
os.environ["KERAS_BACKEND"] = "jax"

# Import Keras 3 and KerasHub modules
import keras
import keras_hub

### Configure your API key

To use Gemma, you must provide your Kaggle username and a Kaggle API key.

To generate a Kaggle API key, go to the **Account** tab of your Kaggle user profile and select **Create New Token**. This will trigger the download of a `kaggle.json` file containing your API credentials.

In Colab, select **Secrets** (🔑) in the left pane and add your Kaggle username and Kaggle API key. Store your username under the name `KAGGLE_USERNAME` and your API key under the name `KAGGLE_KEY`.

## Instantiate a KerasHub model using `from_preset()`

In [None]:
causal_lm = keras_hub.models.CausalLM.from_preset(
    "gemma2_instruct_2b_en",
    dtype="bfloat16",
)

In [None]:
template = "<start_of_turn>user\n{question}<end_of_turn>\n<start_of_turn>model"

question = """Write a python program to generate the first 1000 prime numbers.
Just show the actual code."""
print(causal_lm.generate(template.format(question=question), max_length=512))

<start_of_turn>user
Write a python program to generate the first 1000 prime numbers.
Just show the actual code.<end_of_turn>
<start_of_turn>model```python
def is_prime(n):
  if n <= 1:
    return False
  for i in range(2, int(n**0.5) + 1):
    if n % i == 0:
      return False
  return True

count = 0
number = 2
primes = []
while count < 1000:
  if is_prime(number):
    primes.append(number)
    count += 1
  number += 1
print(primes)
``` 
<end_of_turn>


## Load your model directly from Kaggle

```
keras_hub.models.CausalLM.from_preset("kaggle://my_kaggle_username/gemma-pirate/keras/gemma-pirate-instruct-7b")
```

In [None]:
keras_hub.models.CausalLM.from_preset("kaggle://nkovela/gemma/keras/medical_gemma")

Downloading from https://www.kaggle.com/api/v1/models/nkovela/gemma/keras/medical_gemma/7/download/config.json...


100%|██████████| 501/501 [00:00<00:00, 680kB/s]


Downloading from https://www.kaggle.com/api/v1/models/nkovela/gemma/keras/medical_gemma/7/download/task.json...


100%|██████████| 2.39k/2.39k [00:00<00:00, 2.33MB/s]


Downloading from https://www.kaggle.com/api/v1/models/nkovela/gemma/keras/medical_gemma/7/download/assets/tokenizer/vocabulary.spm...


100%|██████████| 4.04M/4.04M [00:00<00:00, 15.2MB/s]


Downloading from https://www.kaggle.com/api/v1/models/nkovela/gemma/keras/medical_gemma/7/download/model.weights.h5...


100%|██████████| 9.34G/9.34G [02:26<00:00, 68.6MB/s]


<GemmaCausalLM name=gemma_causal_lm, built=True>

## Load your model directly from HuggingFace

In [None]:
keras_hub.models.CausalLM.from_preset("hf://NousResearch/Hermes-2-Pro-Llama-3-8B")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/700 [00:00<?, ?B/s]

ValueError: RESOURCE_EXHAUSTED: Out of memory while trying to allocate 352321668 bytes.

## Customize a Task model and every component in it as you like

### Completely cutomize the model
```
tokenizer = keras_hub.models.GemmaTokenizer(
        proto="proto.spm",
    )
preprocessor = keras_hub.models.GemmaCausalLMPreprocessor(
    tokenizer=tokenizer,
    sequence_length=128,
)
backbone = keras_hub.models.GemmaBackbone(
    vocabulary_size=30552,
    num_layers=4,
    num_heads=4,
    hidden_dim=256,
    intermediate_dim=512,
    max_sequence_length=128,
)
gemma_lm = keras_hub.models.GemmaCausalLM(
    backbone=backbone,
    preprocessor=preprocessor,
)
gemma_lm.fit(x=features, batch_size=2)
```

### Use the presets for some parts of the model and override some args of your choice

In [None]:
preprocessor = keras_hub.models.GemmaCausalLMPreprocessor.from_preset(
    "gemma2_instruct_2b_en",
    dtype="bfloat16",
    sequence_length=64,  # Override the sequence_length arg
)
backbone = keras_hub.models.GemmaBackbone.from_preset("gemma2_instruct_2b_en")
gemma_lm = keras_hub.models.GemmaCausalLM(
    backbone=backbone,
    preprocessor=preprocessor,
)

In [None]:
template = "<start_of_turn>user\n{question}<end_of_turn>\n<start_of_turn>model"

question = """Write a python program to generate the first 1000 prime numbers.
Just show the actual code."""
print(gemma_lm.generate(template.format(question=question), max_length=512))

<start_of_turn>user
Write a python program to generate the first 1000 prime numbers.
Just show the actual code.<end_of_turn>
<start_of_turn>model```python
def is_prime(n):
  if n <= 1:
    return False
  for i in range(2, int(n**0.5) + 1):
    if n % i == 0:
      return False
  return True

count = 0
number = 2
primes = []
while count < 1000:
  if is_prime(number):
    primes.append(number)
    count += 1
  number += 1
print(primes)
``` 
<end_of_turn>


## Fine Tuning: Enable Lora

In [None]:
causal_lm.backbone.enable_lora(rank=4)
causal_lm.summary()

In [None]:
#@title Prepare Dataset
import tensorflow_datasets as tfds
import tensorflow as tf

train_ds = tfds.load("mtnt/fr-en", split="train")
train_ds = train_ds.map(
    lambda x: tf.strings.join(
        [
            "<start_of_turn>user\n",
            "Translate French into English:\n",
            x["src"],
            "<end_of_turn>\n",
            "<start_of_turn>model\n",
            "Translation:\n",
            x["dst"],
            "<end_of_turn>",
        ]
    )
)

train_ds = train_ds.batch(1).take(100)
optimizer = keras.optimizers.SGD(learning_rate=1e-4)
causal_lm.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=optimizer,
    weighted_metrics=[keras.metrics.SparseCategoricalAccuracy()],
)
causal_lm.fit(train_ds, epochs=1)

[1m  1/100[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m52:55[0m 32s/step - loss: 0.3329 - sparse_categorical_accuracy: 0.2083

## Fine Tuning: Enable QLoRA

In [None]:
# instantiate the model
gemma_causal_lm = keras_hub.models.CausalLM.from_preset(
    "gemma_2b_en",
    dtype="bfloat16",
)
# Quantize the weights using dynamic int8 quantization.
gemma_causal_lm.quantize("int8")
# Enable LoRA
gemma_causal_lm.backbone.enable_lora(rank=4)
gemma_causal_lm.summary()



In [None]:
optimizer = keras.optimizers.SGD(learning_rate=1e-4)
gemma_causal_lm.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=optimizer,
    weighted_metrics=[keras.metrics.SparseCategoricalAccuracy()],
)
gemma_causal_lm.fit(train_ds, epochs=1)

[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 226ms/step - loss: 0.3699 - sparse_categorical_accuracy: 0.4441


<keras.src.callbacks.history.History at 0x79fde41462f0>