In [None]:
%%capture
import os
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    # Do this only in Colab notebooks! Otherwise use pip install unsloth
    !pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3 peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1" huggingface_hub hf_transfer
    !pip install --no-deps unsloth

In [None]:
import torch
from unsloth import FastLanguageModel
from transformers import TrainingArguments
from trl import SFTTrainer
from datasets import load_dataset

# 1. Load the Model
# Unsloth will automatically handle 4-bit quantization and PEFT configuration.
# We also specify a max sequence length for the model.
max_seq_length = 2048

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/medgemma-4b-pt",
    max_seq_length = max_seq_length,
    load_in_4bit = True,
    dtype = None, # Will default to torch.bfloat16 if available
)

# 2. Configure LoRA Adapters
# This adds trainable "adapter" layers to the model.
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Rank of the adapters. A common choice.
    lora_alpha = 16, # A scaling factor for the adapters.
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = True,
    random_state = 42,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                    "gate_proj", "up_proj", "down_proj",],
)

print("Unsloth model configured for 4-bit LoRA fine-tuning!")

In [None]:
from datasets import load_dataset

# Load just the first example from the dataset to inspect its structure
ds_preview = load_dataset("epfl-llm/guidelines", split="train", streaming=True).take(1)
example = list(ds_preview)[0]

print("Dataset Columns Found:")
print(example.keys())

In [None]:
# --- CORRECTED STEP 2: Prepare the Dataset (Based on Actual Data Structure) ---

# Define a simple prompt structure
prompt_template = """### Source:
{}

### Guideline Text:
{}"""

# We need a special token to signify the end of a sequence
EOS_TOKEN = tokenizer.eos_token

# Function to format each example in the dataset
def format_prompt(example):
    # CORRECTED: Use 'clean_text' for the main content and 'source' for the origin.
    # These keys are confirmed to be in the dataset from your debugging.
    formatted_text = prompt_template.format(example['source'], example['clean_text']) + EOS_TOKEN

    # Return a dictionary with a single key named "text",
    # as this is what the SFTTrainer expects by default.
    return { "text" : formatted_text }


# Load the full dataset for training
ds = load_dataset("epfl-llm/guidelines", split="train")

# Apply the formatting function to the entire dataset
# This will now work because we are using the correct, verified keys.
ds = ds.map(format_prompt, num_proc=4) # Using multiple processes to speed it up

print("\nDataset loaded and formatted successfully!")
print("Here is an example of a formatted prompt:")
print(ds[0]['text']) # Print the first example to see the final format

In [None]:
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = ds,
    dataset_text_field = "text", # The name of the field containing our formatted prompts
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Can be set to True for faster training on many short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4, # Simulates a larger batch size (2 * 4 = 8)
        warmup_steps = 5,
        max_steps = 500, # A small number for demonstration. Increase for a full run (e.g., 200-500).
        learning_rate = 2e-4,
        fp16 = not torch.cuda.is_bf16_supported(), # Use bf16 if available, else fp16
        bf16 = torch.cuda.is_bf16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit", # Use 8-bit optimizer to save memory
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 42,
        output_dir = "outputs",
    ),
)

# Let's start the training!
print("Starting the fine-tuning process...")
trainer_stats = trainer.train()
print("Fine-tuning complete!")

In [None]:
# Let's test our newly fine-tuned model
# We'll use the same prompt template, but only provide the "Source" part
# The model should then complete the "Guideline Text" part

# Load the base model and tokenizer for inference
from transformers import pipeline

# You can use the `trainer.model` directly if you're in the same session
# Or load the saved adapters like this for a new session
# from unsloth import FastLanguageModel
# model, tokenizer = FastLanguageModel.from_pretrained(
#     model_name = "outputs/checkpoint-60", # Or your final saved model
# )

text_pipeline = pipeline("text-generation", model=trainer.model, tokenizer=tokenizer)

# Create a test prompt
test_prompt_input = "American College of Cardiology"
test_prompt_formatted = prompt_template.format(test_prompt_input, "") # The empty string is where the model will generate

# Run inference
output = text_pipeline(test_prompt_formatted, max_new_tokens=256)

# Print the result
print("="*50)
print("PROMPT:")
print(test_prompt_formatted)
print("\nMODEL OUTPUT:")
print(output[0]['generated_text'])
print("="*50)

In [None]:
# Save the fine-tuned LoRA adapters
# --- Step 5: Save your fine-tuned model adapters locally ---
# (This is the same as before, ensuring the files are ready)

lora_model_name = "medgemma-guidelines-4b-4bit-lora"
model.save_pretrained(lora_model_name)
tokenizer.save_pretrained(lora_model_name)

print(f"LoRA adapters saved locally to '{lora_model_name}'")


# --- Step 6: Log in and Upload to Hugging Face Hub ---

from huggingface_hub import notebook_login

# 1. Log in to your Hugging Face account
# A widget will appear. Paste your access token with 'write' permissions here.
notebook_login()

# 2. Push the model adapters to the Hub
# The push_to_hub command will create a new repository if it doesn't exist.
# Make sure to replace "your-hf-username" with your actual Hugging Face username.
hf_repo_name = "huseyincavus/medgemma-4b-guidelines-lora"

print(f"Uploading adapters to Hugging Face Hub repository: {hf_repo_name}")
model.push_to_hub(hf_repo_name, use_auth_token=True)
tokenizer.push_to_hub(hf_repo_name, use_auth_token=True)
print("Upload complete!")