# Model Finetuning with Unsloth on Modal

This notebook demonstrates how to finetune various models using Unsloth on the Modal platform. Use the dropdown menu below to select the model you want to finetune.

In [None]:
import ipywidgets as widgets
from IPython.display import display

model_options = {
    'Llama 3.1 8B': 'meta-llama/Llama-3.1-8B',
    'Gemma 3 4B': 'unsloth/gemma-3-4b',
    'Gemma 3n E4B': 'unsloth/gemma-3n-E4B',
    'Mistral 7B Instruct v0.3': 'unsloth/mistral-7b-instruct-v0.3',
    'Qwen2 4B Instruct': 'Qwen/Qwen2-4B-Instruct',
    'Qwen2 8B': 'Qwen/Qwen2-8B',
    'Qwen2 34B Thinking': 'Qwen/Qwen2-34B-Thinking',
    'Yi 6B': '01-ai/Yi-6B'
}

model_dropdown = widgets.Dropdown(
    options=model_options,
    value='meta-llama/Llama-3.1-8B',
    description='Model:',
    disabled=False,
)

display(model_dropdown)

In [None]:
import modal
import os

# Modal App Configuration
stub = modal.App("llama-finetune-unsloth")

# Modal Image Configuration
image = modal.Image.debian_slim(python_version="3.10") \
    .apt_install("git") \
    .pip_install(
        "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git",
        "torch",
        "transformers",
        "datasets",
        "trl",
        "accelerate",
    )

In [None]:
@stub.function(
    gpu="A10G",
    image=image,
    secrets=[modal.Secret.from_name("my-huggingface-secret")],
    timeout=18000,
)
def finetune(model_name: str):
    import torch
    from huggingface_hub import login
    from datasets import load_dataset
    from unsloth import FastLanguageModel
    from trl import SFTTrainer
    from transformers import TrainingArguments

    # Log in to Hugging Face
    login(token=os.environ["HUGGING_FACE_HUB_TOKEN"])

    # Load the model and tokenizer first to apply the chat template
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name=model_name,
        max_seq_length=2048,
        dtype=None,
        load_in_4bit=True,
    )

    def format_prompt(example):
        messages = example.get("messages", [])
        if messages:
            try:
                text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)
                return {"text": text}
            except Exception as e:
                print(f"Error formatting prompt: {e}")
                return {"text": ""}
        else:
            return {"text": ""}

    # Load and format the dataset
    dataset = load_dataset("Guilherme34/uncensor", split="train")
    dataset = dataset.map(format_prompt, batched=False)  # Process one example at a time
    # Filter out empty texts
    dataset = dataset.filter(lambda x: len(x["text"]) > 0)

    # Configure LoRA
    model = FastLanguageModel.get_peft_model(
        model,
        r=16,
        lora_alpha=16,
        lora_dropout=0,
        bias="none",
        use_gradient_checkpointing=True,
        random_state=3407,
        use_rslora=False,
        loftq_config=None,
    )

    # Configure the trainer
    trainer = SFTTrainer(
        model=model,
        tokenizer=tokenizer,
        train_dataset=dataset,
        dataset_text_field="text",
        max_seq_length=2048,
        dataset_num_proc=2,
        packing=False,  # Can make training 5x faster for short sequences.
        args=TrainingArguments(
            per_device_train_batch_size=2,
            gradient_accumulation_steps=4,
            warmup_steps=5,
            max_steps=60,
            learning_rate=2e-4,
            fp16=not torch.cuda.is_bf16_supported(),
            bf16=torch.cuda.is_bf16_supported(),
            logging_steps=1,
            optim="adamw_8bit",
            weight_decay=0.01,
            lr_scheduler_type="linear",
            seed=3407,
            output_dir="outputs",
        ),
    )

    # Start training
    trainer.train()

    # Push the model to the Hugging Face Hub
    safe_model_name = model_name.replace('/', '-')
    hub_model_name = f"realoperator42/{safe_model_name}-uncensored"
    model.push_to_hub(hub_model_name, token=os.environ["HUGGING_FACE_HUB_TOKEN"])
    print(f"Model pushed to Hugging Face Hub at {hub_model_name}")

## Running the Finetuning

To run the finetuning process, you need to deploy this notebook to Modal. You can do this by running the following command in your terminal:

```bash
modal deploy Modal_Notebook.ipynb
```

Then, to run the finetuning function, you can call it from the command line. Make sure to pass the selected model name:

```bash
modal run Modal_Notebook.ipynb::finetune --model-name [SELECTED_MODEL_NAME]
```

For example:
```bash
modal run Modal_Notebook.ipynb::finetune --model-name meta-llama/Llama-3.1-8B
```