In [1]:
%%capture
!pip install unsloth vllm
!pip install --no-deps git+https://github.com/huggingface/transformers@v4.49.0-Gemma-3

In [2]:
from unsloth import FastModel
import torch

model, tokenizer = FastModel.from_pretrained(
    model_name="unsloth/gemma-3-1b-it-unsloth-bnb-4bit",
    max_seq_length=2048,
    load_in_4bit=True,
    load_in_8bit=False,
    full_finetuning=False,
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
INFO 04-09 10:30:45 [__init__.py:239] Automatically detected platform cuda.
==((====))==  Unsloth 2025.3.19: Fast Gemma3 patching. Transformers: 4.50.0.dev0. vLLM: 0.8.3.
   \\   /|    Tesla T4. Num GPUs = 2. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: Using float16 precision for gemma3 won't work! Using float32.


model.safetensors:   0%|          | 0.00/1.00G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/215 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.16M [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.69M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/33.4M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/35.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/670 [00:00<?, ?B/s]

In [3]:
model = FastModel.get_peft_model(
    model,
    finetune_vision_layers=False,
    finetune_language_layers=True,
    finetune_attention_modules=True,
    finetune_mlp_modules=True,
    r=8,
    lora_alpha=8,
    lora_dropout=0,
    bias="none",
    random_state=42,
)

Unsloth: Making `model.base_model.model.model` require gradients


In [4]:
with open('/kaggle/input/simplified-macbeth/simplified_macbeth', 'r') as f:
    text = f.read()

lines = text.split('\n\n')
without_name = [''.join(l.split(':')[1:])[1:] for l in lines]
originals = [l.split('[[')[0][:-1] for l in without_name]
translated = [l.split('[[')[1].split(']]')[0] for l in without_name]
print(originals[115], '---->', translated[115])

import pandas as pd
from datasets import Dataset

data = {"input": originals, "output": translated}
df = pd.DataFrame(data)
dataset = Dataset.from_pandas(df)

prompt = """Below is an instruction on how to translate text from complex and archaic to simple and modern english.
The input contains this text. Write in the response a faithful translation.

### Instruction:
Translate the following line from complex to simple english.
It may contain archaic and literary words or structures: make them easily understandable.

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token
def formatting_prompts_func(data):
    inputs = data['input']
    outputs = data['output']
    texts = []
    for inp, outp in zip(inputs, outputs):
        text = prompt.format(inp, outp) + EOS_TOKEN
        texts.append(text)
    return {"text" : texts}

dataset = dataset.map(formatting_prompts_func, batched=True)

We will proceed no further in this business He hath honour'd me of late; and I have bought Golden opinions from all sorts of people, Which would be worn now in their newest gloss, Not cast aside so soon. ----> We will not continue with this plan. He has honored me recently, and I have earned the respect of all kinds of people, which I want to enjoy now, not throw away so soon.


Map:   0%|          | 0/654 [00:00<?, ? examples/s]

In [5]:
inputs = tokenizer(
[
    prompt.format(
        "This above all: to thine own self be true, And it must follow, as the night the day,Thou canst not then be false to any man.",
        "",
    )
], return_tensors="pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens=2048, use_cache=True)
tokenizer.batch_decode(outputs)

["<bos>Below is an instruction on how to translate text from complex and archaic to simple and modern english.\nThe input contains this text. Write in the response a faithful translation.\n\n### Instruction:\nTranslate the following line from complex to simple english.\nIt may contain archaic and literary words or structures: make them easily understandable.\n\n### Input:\nThis above all: to thine own self be true, And it must follow, as the night the day,Thou canst not then be false to any man.\n\n### Response:\nThis is a saying about being honest with yourself. It means that you should always be true to yourself, and that you can't be false to anyone.\n\n**Explanation:**\n\nThe translation aimed to:\n*   Replace archaic words with simpler equivalents.\n*   Simplify the sentence structure.\n*   Maintain the core meaning of the original text.\n\nLet me know if you want me to do something else.\n<end_of_turn>"]

In [6]:
from trl import SFTTrainer, SFTConfig
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    eval_dataset=None,
    args=SFTConfig(
        dataset_text_field="text",
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        warmup_steps=5,
        num_train_epochs=1,
        learning_rate=2e-4,
        logging_steps=1,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=42,
        report_to="none",
    ),
)

Unsloth: Switching to float32 training since model cannot work with float16


Unsloth: Tokenizing ["text"] (num_proc=4):   0%|          | 0/654 [00:00<?, ? examples/s]

In [7]:
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

GPU = Tesla T4. Max memory = 14.741 GB.
1.512 GB of memory reserved.


In [8]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 654 | Num Epochs = 1 | Total steps = 41
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 4 x 1) = 16
 "-____-"     Trainable parameters = 6,522,880/1,000,000,000 (0.65% trained)
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
1,4.0976
2,4.0902
3,4.1893
4,3.7061
5,3.7995
6,3.2875
7,3.0308
8,3.0123
9,2.9835
10,2.8184


In [9]:
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory / max_memory * 100, 3)
lora_percentage = round(used_memory_for_lora / max_memory * 100, 3)
print(f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.")
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")

2.57 minutes used for training.
Peak reserved memory = 1.559 GB.
Peak reserved memory for training = 0.047 GB.
Peak reserved memory % of max memory = 10.576 %.
Peak reserved memory for training % of max memory = 0.319 %.


In [10]:
FastModel.for_inference(model)
inputs = tokenizer(
[
    prompt.format(
#        "This above all: to thine own self be true, And it must follow, as the night the day,Thou canst not then be false to any man.",
        "This goodly frame, the earth, seems to me a sterile promontory, this most excellent canopy, the air, look you, this brave o'erhanging firmament, this majestical roof fretted with golden fire, why, it appears no other thing to me than a foul and pestilent congregation of vapours. What a piece of work is a man! how noble in reason! how infinite in faculty! in form and moving how express and admirable! in action how like an angel! in apprehension how like a god! the beauty of the world! the paragon of animals! And yet, to me, what is this quintessence of dust?",    
        "",
    )
], return_tensors="pt").to("cuda")
outputs = model.generate(
    **inputs,
    max_new_tokens=2048,
    temperature=1.0, top_p=0.95, top_k=64,
)
tokenizer.batch_decode(outputs)

["<bos>Below is an instruction on how to translate text from complex and archaic to simple and modern english.\nThe input contains this text. Write in the response a faithful translation.\n\n### Instruction:\nTranslate the following line from complex to simple english.\nIt may contain archaic and literary words or structures: make them easily understandable.\n\n### Input:\nThis goodly frame, the earth, seems to me a sterile promontory, this most excellent canopy, the air, look you, this brave o'erhanging firmament, this majestical roof fretted with golden fire, why, it appears no other thing to me than a foul and pestilent congregation of vapours. What a piece of work is a man! how noble in reason! how infinite in faculty! in form and moving how express and admirable! in action how like an angel! in apprehension how like a god! the beauty of the world! the paragon of animals! And yet, to me, what is this quintessence of dust?\n\n### Response:\nThis beautiful frame, the earth, seems lik

In [11]:
model.push_to_hub('davide710/pf_gemma_1b', token='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')
tokenizer.push_to_hub('davide710/pf_gemma_1b', token='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')

README.md:   0%|          | 0.00/610 [00:00<?, ?B/s]

  0%|          | 0/1 [00:00<?, ?it/s]

adapter_model.safetensors:   0%|          | 0.00/26.1M [00:00<?, ?B/s]

Saved model to https://huggingface.co/davide710/pf_gemma_1b


No files have been modified since last commit. Skipping to prevent empty commit.


In [13]:
max_seq_length = 2048
dtype = None
load_in_4bit = True

saved, tokenizer = FastModel.from_pretrained(
    model_name='davide710/pf_gemma_1b',
    token='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',
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit
)

prompt = """Below is an instruction on how to translate text from complex and archaic to simple and modern english.
The input contains this text. Write in the response a faithful translation.

### Instruction:
Translate the following line from complex to simple english.
It may contain archaic and literary words or structures: make them easily understandable.

### Input:
{}

### Response:
{}"""

==((====))==  Unsloth 2025.3.19: Fast Gemma3 patching. Transformers: 4.50.0.dev0. vLLM: 0.8.3.
   \\   /|    Tesla T4. Num GPUs = 2. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: Using float16 precision for gemma3 won't work! Using float32.


adapter_model.safetensors:   0%|          | 0.00/26.1M [00:00<?, ?B/s]

In [15]:
FastModel.for_inference(saved)
inputs = tokenizer(
[
    prompt.format(
        "This above all: to thine own self be true, And it must follow, as the night the day,Thou canst not then be false to any man.",
#        "This goodly frame, the earth, seems to me a sterile promontory, this most excellent canopy, the air, look you, this brave o'erhanging firmament, this majestical roof fretted with golden fire, why, it appears no other thing to me than a foul and pestilent congregation of vapours. What a piece of work is a man! how noble in reason! how infinite in faculty! in form and moving how express and admirable! in action how like an angel! in apprehension how like a god! the beauty of the world! the paragon of animals! And yet, to me, what is this quintessence of dust?",    
        "",
    )
], return_tensors="pt").to("cuda")
outputs = saved.generate(
    **inputs,
    max_new_tokens=2048,
    temperature=1.0, top_p=0.95, top_k=64,
)
tokenizer.batch_decode(outputs)

["<bos>Below is an instruction on how to translate text from complex and archaic to simple and modern english.\nThe input contains this text. Write in the response a faithful translation.\n\n### Instruction:\nTranslate the following line from complex to simple english.\nIt may contain archaic and literary words or structures: make them easily understandable.\n\n### Input:\nThis above all: to thine own self be true, And it must follow, as the night the day,Thou canst not then be false to any man.\n\n### Response:\nThis is the most important thing: be true to yourself. And it must follow the same rule as the night and the day: you can't be false to anyone.<end_of_turn>"]