In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/gemma/pytorch/1.1-2b-it/1/config.json
/kaggle/input/gemma/pytorch/1.1-2b-it/1/gemma-2b-it.ckpt
/kaggle/input/gemma/pytorch/1.1-2b-it/1/tokenizer.model


In [2]:
# Setup the environment
!pip install -q -U immutabledict sentencepiece 
!git clone https://github.com/google/gemma_pytorch.git
!mkdir /kaggle/working/gemma/
!mv /kaggle/working/gemma_pytorch/gemma/* /kaggle/working/gemma/

import sys 
sys.path.append("/kaggle/working/gemma_pytorch/") 
from gemma.config import GemmaConfig, get_config_for_7b, get_config_for_2b
from gemma.model import GemmaForCausalLM
from gemma.tokenizer import Tokenizer
import contextlib
import os
import torch
import kagglehub

# Load the model
VARIANT = "2b-it" 
MACHINE_TYPE = "cpu" 
weights_dir = '/kaggle/input/gemma/pytorch/1.1-2b-it/1/' 
# kagglehub.download('gemma/pytorch/1.1-2b-it/1/')


@contextlib.contextmanager
def _set_default_tensor_type(dtype: torch.dtype):
  """Sets the default torch dtype to the given dtype."""
  torch.set_default_dtype(dtype)
  yield
  torch.set_default_dtype(torch.float)

model_config = get_config_for_2b() if "2b" in VARIANT else get_config_for_7b()
model_config.tokenizer = os.path.join(weights_dir, "tokenizer.model")

device = torch.device(MACHINE_TYPE)
with _set_default_tensor_type(model_config.get_dtype()):
  model = GemmaForCausalLM(model_config)
  ckpt_path = os.path.join(weights_dir, f'gemma-{VARIANT}.ckpt')
  model.load_weights(ckpt_path)
  model = model.to(device).eval()


# Use the model

USER_CHAT_TEMPLATE = "<start_of_turn>user\n{prompt}<end_of_turn>\n"
MODEL_CHAT_TEMPLATE = "<start_of_turn>model\n{prompt}<end_of_turn>\n"

prompt = (
    USER_CHAT_TEMPLATE.format(
        prompt="What is a good place for travel in the US?"
    )
    + MODEL_CHAT_TEMPLATE.format(prompt="California.")
    + USER_CHAT_TEMPLATE.format(prompt="What can I do in California?")
    + "<start_of_turn>model\n"
)

model.generate(
    USER_CHAT_TEMPLATE.format(prompt=prompt),
    device=device,
    output_len=100,
)


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m35.6 MB/s[0m eta [36m0:00:00[0m
[?25hCloning into 'gemma_pytorch'...
remote: Enumerating objects: 297, done.[K
remote: Counting objects: 100% (180/180), done.[K
remote: Compressing objects: 100% (100/100), done.[K
remote: Total 297 (delta 128), reused 80 (delta 80), pack-reused 117 (from 1)[K
Receiving objects: 100% (297/297), 5.53 MiB | 22.66 MiB/s, done.
Resolving deltas: 100% (166/166), done.


'**Popular tourist destinations in California:**\n\n**Coastal Destinations:**\n- Santa Monica\n- Malibu\n- Yosemite National Park\n- Monterey Bay\n- San Diego\n- Santa Catalina Island\n\n**Mountain and Wilderness Destinations:**\n- Mammoth Mountain\n- Yosemite National Park\n- Sequoia National Park\n- Redwood National and State Parks\n- Mount Shasta\n\n**Desert Destinations:**\n- Coachella Valley\n- Joshua Tree National Park\n- Death Valley National Park\n\n**Historic and Cultural Destinations:**\n- San'

In [3]:
# # Setup the environment
# !pip install accelerate
# !pip install -i https://pypi.org/simple/ bitsandbytes
# from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
# import kagglehub
# # Load the model
# # kagglehub.login()
# # kagglehub.model_download('google/gemma/transformers/1.1-2b-it/1/')
# quantization_config = BitsAndBytesConfig(load_in_4bit=True)
# tokenizer = AutoTokenizer.from_pretrained("/kaggle/input/gemma/transformers/1.1-2b-it/1/")
# model = AutoModelForCausalLM.from_pretrained(
#     "/kaggle/input/gemma/transformers/1.1-2b-it/1/",
#     quantization_config=quantization_config
# )
# # Use the model
# input_text = "Write me a poem about Machine Learning."
# input_ids = tokenizer(input_text, return_tensors="pt")#.to("cuda")
# outputs = model.generate(**input_ids)
# print(tokenizer.decode(outputs[0]))
