# Loading Gemma3 and finetune

In [5]:
import torch

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

from transformers import AutoTokenizer, Gemma3ForCausalLM

model = Gemma3ForCausalLM.from_pretrained("google/gemma-3-1b-pt")
tokenizer = AutoTokenizer.from_pretrained("google/gemma-3-1b-pt")

print("Gemma3 model and tokenizer loaded successfully.")


Using device: cuda
Gemma3 model and tokenizer loaded successfully.


In [6]:
print(model)

Gemma3ForCausalLM(
  (model): Gemma3TextModel(
    (embed_tokens): Gemma3TextScaledWordEmbedding(262144, 1152, padding_idx=0)
    (layers): ModuleList(
      (0-25): 26 x Gemma3DecoderLayer(
        (self_attn): Gemma3Attention(
          (q_proj): Linear(in_features=1152, out_features=1024, bias=False)
          (k_proj): Linear(in_features=1152, out_features=256, bias=False)
          (v_proj): Linear(in_features=1152, out_features=256, bias=False)
          (o_proj): Linear(in_features=1024, out_features=1152, bias=False)
          (q_norm): Gemma3RMSNorm((256,), eps=1e-06)
          (k_norm): Gemma3RMSNorm((256,), eps=1e-06)
        )
        (mlp): Gemma3MLP(
          (gate_proj): Linear(in_features=1152, out_features=6912, bias=False)
          (up_proj): Linear(in_features=1152, out_features=6912, bias=False)
          (down_proj): Linear(in_features=6912, out_features=1152, bias=False)
          (act_fn): PytorchGELUTanh()
        )
        (input_layernorm): Gemma3RMSNorm((11

# Explore tokenizer 

In [7]:
# Explore the tokenizer

# Print the vocab size
print(f"Tokenizer vocab size: {tokenizer.vocab_size}")

# Print the first 20 tokens in the vocab
print("First 10 tokens in the vocab:")
for i in range(10):
    token = tokenizer.convert_ids_to_tokens(i)
    print(f"ID {i}: {token}")

# Print special tokens
print("\nSpecial tokens:")
for name, token in tokenizer.special_tokens_map.items():
    print(f"{name}: {token}")

# Show the tokenizer config
print("\nTokenizer config keys:")
for key in tokenizer.init_kwargs:
    print(f"{key}: {tokenizer.init_kwargs[key]}")

# Try encoding and decoding a sample sentence
sample_text = "Hello, how are you?"
encoded = tokenizer.encode(sample_text)
decoded = tokenizer.decode(encoded)
print(f"\nSample text: {sample_text}")
print(f"Encoded: {encoded}")
print(f"Decoded: {decoded}")


Tokenizer vocab size: 262144
First 10 tokens in the vocab:
ID 0: <pad>
ID 1: <eos>
ID 2: <bos>
ID 3: <unk>
ID 4: <mask>
ID 5: [multimodal]
ID 6: <unused0>
ID 7: <unused1>
ID 8: <unused2>
ID 9: <unused3>

Special tokens:
bos_token: <bos>
eos_token: <eos>
unk_token: <unk>
pad_token: <pad>
boi_token: <start_of_image>
eoi_token: <end_of_image>
image_token: <image_soft_token>

Tokenizer config keys:
vocab_file: /home/nguyen/.cache/huggingface/hub/models--google--gemma-3-1b-pt/snapshots/fcf18a2a879aab110ca39f8bffbccd5d49d8eb29/tokenizer.model
clean_up_tokenization_spaces: False
unk_token: <unk>
bos_token: <bos>
eos_token: <eos>
pad_token: <pad>
add_bos_token: True
add_eos_token: False
boi_token: <start_of_image>
eoi_token: <end_of_image>
extra_special_tokens: {'boi_token': '<start_of_image>', 'eoi_token': '<end_of_image>', 'image_token': '<image_soft_token>'}
image_token: <image_soft_token>
model_max_length: 1000000000000000019884624838656
sp_model_kwargs: None
spaces_between_special_tokens:

# Run inference

In [4]:
# Run inference on the Gemma3 model with the prompt "describe what you can do"

prompt = "describe what you can do"

# Tokenize the prompt and get input ids
input_ids = tokenizer(prompt, return_tensors="pt").input_ids

# Generate output from the model
with torch.no_grad():
    output_ids = model.generate(
        input_ids,
        max_new_tokens=128,
        do_sample=True,
        temperature=0.7,
        top_p=0.95,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.pad_token_id,
    )

# Decode the generated output (skip the prompt part)
generated_text = tokenizer.decode(output_ids[0][input_ids.shape[-1]:], skip_special_tokens=True)

print("Prompt:", prompt)
print("Generated response:", generated_text)


Prompt: describe what you can do
Generated response:  to make the world a better placeIf you have an interest in helping the world then why not start by becoming a part of the solution. One of the best ways to make the world a better place is by starting a non-profit organisation. Non-profits are organisations that are not-for-profit and are registered with the Department of Home Affairs. The Department of Home Affairs is responsible for registering and monitoring non-profits in South Africa. The non-profit status gives the organisation legal protection and allows it to receive tax-exempt status from the South African Revenue Service (SARS).

Non-profits are registered under the Non-
