In [2]:
from unsloth import FastModel
import torch
max_seq_length = 2048

model, tokenizer = FastModel.from_pretrained(
    model_name = "unsloth/gemma-3-270m-it",
    max_seq_length = max_seq_length, # Choose any for long context!
    load_in_4bit = False,  # 4 bit quantization to reduce memory
    load_in_8bit = False, # [NEW!] A bit more accurate, uses 2x memory
    full_finetuning = False, # [NEW!] We have full finetuning now!
    # token = "hf_...", # use one if using gated models
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


  from .autonotebook import tqdm as notebook_tqdm


🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.8.8: Fast Gemma3 patching. Transformers: 4.55.2.
   \\   /|    NVIDIA GeForce RTX 3080 Ti. Num GPUs = 1. Max memory: 11.624 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu128. CUDA: 8.6. CUDA Toolkit: 12.8. Triton: 3.4.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.32.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: QLoRA and full finetuning all not selected. Switching to 16bit LoRA.


In [3]:
model = FastModel.get_peft_model(
    model,
    r = 128, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 128,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

Unsloth: Making `model.base_model.model.model` require gradients


In [4]:
from unsloth.chat_templates import get_chat_template
tokenizer = get_chat_template(
    tokenizer,
    chat_template = "gemma3",
)

In [5]:
from datasets import load_dataset
dataset = load_dataset("FreedomIntelligence/medical-o1-reasoning-SFT", "en", split="train")

In [6]:
# We now use convert_to_chatml to try converting datasets to the correct format for finetuning purposes!
def convert_to_chatml(example):
    return {
        "conversations": [
            {"role": "user", "content": example["Question"]},
            {"role": "assistant", "content": example["Complex_CoT"] + "\n" + example["Response"]}
        ]
    }


dataset = dataset.map(
    convert_to_chatml
)

In [None]:
# from itertools import islice
# next(islice(dataset, 10, 11))
# dataset[10]

{'Question': 'In a patient with dermatomyositis as indicated by fatigue, muscle weakness, a scaly rash, elevated creatine kinase-MB, anti-Jo-1 antibodies, and perimysial inflammation, which type of cancer is most often associated with this condition?',
 'Complex_CoT': "Alright, so when I'm thinking about dermatomyositis, I know it's an inflammatory condition with muscle weakness and a telltale skin rash. It's sometimes linked to certain cancers. \n\nNow, I remember reading somewhere that when you have dermatomyositis, there's a higher chance of getting some types of cancer. But which one pops up the most? Let's think about that. \n\nFrom what I recall, ovarian cancer often gets mentioned in this context. That's especially true for women with dermatomyositis. So, it seems like a big contender. \n\nHowever, just pointing out ovarian cancer seems a bit simplistic. With diseases like this, there might be more frequently encountered cancers, especially if you consider both men and women. \n

In [7]:
def formatting_prompts_func(examples):
   convos = examples["conversations"]
   texts = [tokenizer.apply_chat_template(convo, tokenize = False, add_generation_prompt = False).removeprefix('<bos>') for convo in convos]
   return { "text" : texts, }

dataset = dataset.map(formatting_prompts_func, batched = True)

Map: 100%|██████████| 19704/19704 [00:00<00:00, 20712.86 examples/s]


In [None]:
# dataset[100]

{'Question': 'A 25-year-old woman presents to the ED with a diffuse, erythematous rash, nausea, vomiting, and fever for 2 days. Physical examination reveals a soaked tampon in her vagina, and blood cultures are negative, suggesting toxic shock syndrome. Which specific molecule on T cells does the toxin most likely bind to?',
 'Complex_CoT': "Alright, here's a situation with a 25-year-old woman who showed up in the emergency department. She's got this widespread red rash, feeling nauseous, she's vomiting, and running a fever for two days. Something's not quite right here, and it all starts connecting to the idea of toxic shock syndrome. Oh, and there's a crucial detail: they found a soaked tampon during her exam.\n\nOkay, let's dig into what's happening in toxic shock syndrome. It's a bit of a nightmare because it's associated with these things called superantigens. These are basically like the rogue agents of the bacterial world, and they're mostly coming from bugs like Staphylococcus 

In [8]:
# train the model
from trl import SFTTrainer, SFTConfig
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    eval_dataset = None, # Can set up evaluation!
    args = SFTConfig(
        dataset_text_field = "text",
        per_device_train_batch_size = 8,
        gradient_accumulation_steps = 1, # Use GA to mimic batch size!
        warmup_steps = 5,
        num_train_epochs = 1, # Set this for 1 full training run.
        # max_steps = None, #100
        learning_rate = 5e-5, # Reduce to 2e-5 for long training runs
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir="outputs",
        report_to = "none", # Use this for WandB etc
    ),
)

Unsloth: Tokenizing ["text"] (num_proc=2): 100%|██████████| 19704/19704 [00:09<00:00, 2096.48 examples/s]


In [9]:
from unsloth.chat_templates import train_on_responses_only
trainer = train_on_responses_only(
    trainer,
    instruction_part = "<start_of_turn>user\n",
    response_part = "<start_of_turn>model\n",
)

Map (num_proc=24): 100%|██████████| 19704/19704 [00:01<00:00, 18570.52 examples/s]


In [33]:
tokenizer.decode(trainer.train_dataset[100]["input_ids"])

"<bos><start_of_turn>user\nA 25-year-old woman presents to the ED with a diffuse, erythematous rash, nausea, vomiting, and fever for 2 days. Physical examination reveals a soaked tampon in her vagina, and blood cultures are negative, suggesting toxic shock syndrome. Which specific molecule on T cells does the toxin most likely bind to?<end_of_turn>\n<start_of_turn>model\nAlright, here's a situation with a 25-year-old woman who showed up in the emergency department. She's got this widespread red rash, feeling nauseous, she's vomiting, and running a fever for two days. Something's not quite right here, and it all starts connecting to the idea of toxic shock syndrome. Oh, and there's a crucial detail: they found a soaked tampon during her exam.\n\nOkay, let's dig into what's happening in toxic shock syndrome. It's a bit of a nightmare because it's associated with these things called superantigens. These are basically like the rogue agents of the bacterial world, and they're mostly coming 

In [34]:
tokenizer.decode([tokenizer.pad_token_id if x == -100 else x for x in trainer.train_dataset[100]["labels"]]).replace(tokenizer.pad_token, " ")

"                                                                            Alright, here's a situation with a 25-year-old woman who showed up in the emergency department. She's got this widespread red rash, feeling nauseous, she's vomiting, and running a fever for two days. Something's not quite right here, and it all starts connecting to the idea of toxic shock syndrome. Oh, and there's a crucial detail: they found a soaked tampon during her exam.\n\nOkay, let's dig into what's happening in toxic shock syndrome. It's a bit of a nightmare because it's associated with these things called superantigens. These are basically like the rogue agents of the bacterial world, and they're mostly coming from bugs like Staphylococcus aureus.\n\nSuperantigens are tricky; they have a special way of activating T cells way beyond the norm. Usually, T cells need to see a specific antigen presented by MHC molecules, but these superantigens bypass that entire check and go straight to just supercharging 

In [10]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 19,704 | Num Epochs = 1 | Total steps = 2,463
O^O/ \_/ \    Batch size per device = 8 | Gradient accumulation steps = 1
\        /    Data Parallel GPUs = 1 | Total batch size (8 x 1 x 1) = 8
 "-____-"     Trainable parameters = 30,375,936 of 298,474,112 (10.18% trained)


Step,Training Loss
1,2.8982
2,2.7534
3,2.7725
4,2.7783
5,2.6207
6,2.6096
7,2.4749
8,2.4351
9,2.4619
10,2.5684


Unsloth: Will smartly offload gradients to save VRAM!


In [11]:
# inference
messages = [
    {'role': 'system','content':dataset['conversations'][10][0]['content']},
    {"role" : 'user', 'content' : dataset['conversations'][10][1]['content']}
]
text = tokenizer.apply_chat_template(
    messages,
    tokenize = False,
    add_generation_prompt = True, # Must add for generation
).removeprefix('<bos>')

from transformers import TextStreamer
_ = model.generate(
    **tokenizer(text, return_tensors = "pt").to("cuda"),
    max_new_tokens = 125,
    temperature = 1, top_p = 0.95, top_k = 64,
    streamer = TextStreamer(tokenizer, skip_prompt = True),
)

<bos><start_of_turn>user
In a patient with dermatomyositis as indicated by fatigue, muscle weakness, a scaly rash, elevated creatine kinase-MB, anti-Jo-1 antibodies, and perimysial inflammation, which type of cancer is most often associated with this condition?

Alright, so when I'm thinking about dermatomyositis, I know it's an inflammatory condition with muscle weakness and a telltale skin rash. It's sometimes linked to certain cancers. 

Now, I remember reading somewhere that when you have dermatomyositis, there's a higher chance of getting some types of cancer. But which one pops up the most? Let's think about that. 

From what I recall, ovarian cancer often gets mentioned in this context. That's especially true for women with dermatomyositis. So, it seems like a big contender. 

However, just pointing out ovarian cancer seems a bit simplistic. With diseases like this, there might be more frequently encountered cancers, especially if you consider both men and women. 

Lung and stom

In [12]:
model.save_pretrained("gemma3-270m-med")  # Local saving
tokenizer.save_pretrained("gemma3-270m-med")

('gemma3-270m-med/tokenizer_config.json',
 'gemma3-270m-med/special_tokens_map.json',
 'gemma3-270m-med/chat_template.jinja',
 'gemma3-270m-med/tokenizer.model',
 'gemma3-270m-med/added_tokens.json',
 'gemma3-270m-med/tokenizer.json')

In [None]:
model.push_to_hub("hashvibe007/gemma3-270m-med-reasoning", token = "") # Online saving
tokenizer.push_to_hub("hashvibe007/gemma3-270m-med-reasoning", token = "") # Online saving

Processing Files (0 / 0)                : |          |  0.00B /  0.00B            
[A
Processing Files (0 / 1)                :   0%|          |  555kB /  122MB,  174kB/s  
[A
[A
Processing Files (0 / 1)                :   1%|          | 1.11MB /  122MB,  292kB/s  
Processing Files (0 / 1)                :   1%|▏         | 1.67MB /  122MB,  416kB/s  
Processing Files (0 / 1)                :   2%|▏         | 2.78MB /  122MB,  661kB/s  
Processing Files (0 / 1)                :   4%|▍         | 5.00MB /  122MB, 1.14MB/s  
[A
Processing Files (0 / 1)                :   5%|▌         | 6.11MB /  122MB, 1.27MB/s  
[A
Processing Files (0 / 1)                :   6%|▋         | 7.77MB /  122MB, 1.50MB/s  
Processing Files (0 / 1)                :   8%|▊         | 9.44MB /  122MB, 1.75MB/s  
Processing Files (0 / 1)                :   9%|▉         | 11.1MB /  122MB, 1.98MB/s  
[A
Processing Files (0 / 1)                :  10%|█         | 12.2MB /  122MB, 2.04MB/s  
Processing Files (0 / 1

Saved model to https://huggingface.co/hashvibe007/gemma3-270m-med-reasoning


Processing Files (0 / 0)                : |          |  0.00B /  0.00B            
[A

[A[A
[A

Processing Files (2 / 2)                : 100%|██████████| 38.1MB / 38.1MB,   ???B/s  
[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

Processing Files (2 / 2)                : 100%|██████████| 38.1MB / 38.1MB,  0.00B/s  
New Data Upload                         : |          |  0.00B /  0.00B,  0.00B/s  
  /tmp/tmpwepgc80j/tokenizer.model      : 100%|██████████| 4.69MB / 4.69MB            
  /tmp/tmpwepgc80j/tokenizer.json       : 100%|██████████| 33.4MB / 33.4MB            
No files have been modified since last commit. Skipping to prevent empty commit.
