In [1]:
from unsloth import FastLanguageModel
import torch

max_seq_length = 4096 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

model, tokenizer=FastLanguageModel.from_pretrained(
    model_name="Qwen/Qwen2.5-1.5B-Instruct",
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
==((====))==  Unsloth 2024.10.7: Fast Qwen2 patching. Transformers = 4.46.1.
   \\   /|    GPU: NVIDIA GeForce RTX 3090. Max memory: 23.668 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.4.0+cu121. CUDA = 8.6. CUDA Toolkit = 12.1.
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.27.post2. FA2 = True]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth




In [2]:
model=FastLanguageModel.get_peft_model(
    model,
    r=64, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
                    "gate_proj", "up_proj", "down_proj"],
    lora_alpha=64,
    lora_dropout=0, # Currently only supports dropout=0
    bias="none",    # Currently only supports bias="none"
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing="unsloth", # True or "unsloth" for very long context
    random_state=3407,
    use_rslora=False,  # We support rank stabilized LoRA
    loftq_config=None, # And LoftQ
)

Unsloth 2024.10.7 patched 28 layers with 0 QKV layers, 28 O layers and 28 MLP layers.


In [3]:
prompt_template = """<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
{}<|im_end|>
<|im_start|>assistant
{}<|im_end|>"""

EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN

def format_prompt(examples):
    prompts   = examples["prompt"]
    responses = examples["response"]
    texts = []
    for prompt, response in zip(prompts, responses):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = prompt_template.format(prompt, response)
        texts.append(text)
    return {"text": texts}

from datasets import load_dataset
dataset = load_dataset("OpenEndedLM/OpenEnded-v0.1")["train"]
dataset = dataset.map(format_prompt, batched=True)

README.md:   0%|          | 0.00/311 [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/715k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/600 [00:00<?, ? examples/s]

Map:   0%|          | 0/600 [00:00<?, ? examples/s]

In [4]:
print(dataset["text"][-1])

<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
How does mental health contribute to overall wellbeing and how can it be improved?<|im_end|>
<|im_start|>assistant
Mental health contributes significantly to overall wellbeing in numerous ways:

1. **Coping and Resilience:** Strong mental health enables individuals to effectively navigate challenges, adapt to changes, and maintain emotional stability. This resilience helps them cope with stressors, job problems, health issues, relationships, and life events.

2. **Behavioral Health:** Mental health supports healthy behaviors such as good nutrition, physical activity, and adequate sleep. These are essential for overall health and wellness.

3. **Social Functioning:** It fosters good relationships, enhances social interaction, and improves collaboration in a team setting. Happy and healthy individuals tend to reciprocate these qualities, which benefits their social connections.

4. **Personal Development:** Mental

In [7]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,

        num_train_epochs = 2,
        #max_steps = 60,

        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none", # Use this for WandB etc
    ),
)

Map (num_proc=2):   0%|          | 0/600 [00:00<?, ? examples/s]

In [8]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 600 | Num Epochs = 2
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 150
 "-____-"     Number of trainable parameters = 73,859,072


  0%|          | 0/150 [00:00<?, ?it/s]

{'loss': 1.6349, 'grad_norm': 0.5394589304924011, 'learning_rate': 4e-05, 'epoch': 0.01}
{'loss': 1.0732, 'grad_norm': 0.4944881200790405, 'learning_rate': 8e-05, 'epoch': 0.03}
{'loss': 1.312, 'grad_norm': 0.5473517179489136, 'learning_rate': 0.00012, 'epoch': 0.04}
{'loss': 1.5588, 'grad_norm': 0.38827428221702576, 'learning_rate': 0.00016, 'epoch': 0.05}
{'loss': 1.3074, 'grad_norm': 0.396592378616333, 'learning_rate': 0.0002, 'epoch': 0.07}
{'loss': 1.6947, 'grad_norm': 0.32169005274772644, 'learning_rate': 0.00019862068965517243, 'epoch': 0.08}
{'loss': 1.2959, 'grad_norm': 0.2792431712150574, 'learning_rate': 0.00019724137931034484, 'epoch': 0.09}
{'loss': 1.3212, 'grad_norm': 0.36545509099960327, 'learning_rate': 0.00019586206896551723, 'epoch': 0.11}
{'loss': 1.1028, 'grad_norm': 0.30583956837654114, 'learning_rate': 0.00019448275862068965, 'epoch': 0.12}
{'loss': 1.0802, 'grad_norm': 0.33153149485588074, 'learning_rate': 0.0001931034482758621, 'epoch': 0.13}
{'loss': 0.9743, '

In [10]:
prompt = """<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
Can you tell me about triangles...<|im_end|>
<|im_start|>assistant
"""

FastLanguageModel.for_inference(model) # Enable native 2x faster inference
inputs=tokenizer([prompt], return_tensors="pt").to("cuda")

outputs=model.generate(**inputs, max_new_tokens=1024, use_cache=True)
print(tokenizer.batch_decode(outputs)[0])

<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
Can you tell me about triangles...<|im_end|>
<|im_start|>assistant
Certainly! A triangle is a fundamental geometric shape consisting of three straight sides and three angles. The sum of the interior angles in any triangle always adds up to 180 degrees, which is a key property that distinguishes it from other polygon shapes.

Triangles can be classified based on their side lengths or angle measurements:

**By Side Lengths:**
- **Equilateral Triangle:** All three sides are equal in length.
- **Isosceles Triangle:** Two sides are equal in length.
- **Scalene Triangle:** No sides are equal in length.

**By Angles:**
- **Acute Triangle:** All three internal angles are less than 90 degrees (acute).
- **Right Triangle:** One internal angle is exactly 90 degrees (right angle), and the other two are acute.
- **Obtuse Triangle:** One internal angle is greater than 90 degrees (obtuse) and the other two are acute.

Triangle

In [11]:
model.save_pretrained_merged("output")

Unsloth: You're not saving a tokenizer as well?
You can do it separately via `tokenizer.save_pretrained(...)`


Unsloth: Merging 4bit and LoRA weights to 16bit...
Unsloth: Will use up to 11.47 out of 31.11 RAM for saving.


100%|██████████| 28/28 [00:00<00:00, 183.46it/s]


Unsloth: Saving model... This might take 5 minutes for Llama-7b...





Done.
