In [1]:
%%capture
!pip install unsloth
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

In [2]:
from unsloth import FastLanguageModel
import torch

max_seq_length = 4096
dtype = None
load_in_4bit = True

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/llama-3-8b-bnb-4bit",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
==((====))==  Unsloth 2024.10.3: Fast Llama patching. Transformers = 4.44.2.
   \\   /|    GPU: NVIDIA A100-SXM4-40GB. Max memory: 39.564 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.4.1+cu121. CUDA = 8.0. CUDA Toolkit = 12.1.
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.28.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.70G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/198 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/50.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

Unsloth: We fixed a gradient accumulation bug, but it seems like you don't have the latest transformers version!
Please update transformers via:
`pip uninstall transformers -y && pip install --upgrade --no-cache-dir "git+https://github.com/huggingface/transformers.git"`


In [3]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
)

Unsloth 2024.10.3 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


<a name="Data"></a>
### Data Preparation

In [4]:
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token

def format_prompt(sample):
    instruction = sample["instruction"]
    input_text = sample["input"]
    accepted = sample["accepted"]
    rejected = sample["rejected"]

    sample["prompt"] = alpaca_prompt.format(instruction, input_text, "")
    sample["chosen"] = accepted + EOS_TOKEN
    sample["rejected"] = rejected + EOS_TOKEN
    return sample

from datasets import load_dataset
dataset = load_dataset("reciperesearch/dolphin-sft-v0.1-preference")["train"]
dataset = dataset.map(format_prompt)

README.md:   0%|          | 0.00/490 [00:00<?, ?B/s]

dpo_fixed.jsonl:   0%|          | 0.00/34.1M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/16000 [00:00<?, ? examples/s]

Map:   0%|          | 0/16000 [00:00<?, ? examples/s]

<a name="Train"></a>
### Training the Model

In [5]:
from unsloth import PatchDPOTrainer
PatchDPOTrainer()

from trl import ORPOConfig, ORPOTrainer
from unsloth import is_bfloat16_supported

orpo_trainer = ORPOTrainer(
    model = model,
    train_dataset = dataset,
    tokenizer = tokenizer,
    args = ORPOConfig(
        max_length = max_seq_length,
        max_prompt_length = max_seq_length//2,
        max_completion_length = max_seq_length//2,
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        beta = 0.1,
        logging_steps = 1,
        optim = "adamw_8bit",
        lr_scheduler_type = "linear",
        max_steps = 30,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        output_dir = "outputs",
        report_to = "none",
    ),
)

orpo_trainer.train()

Map:   0%|          | 0/16000 [00:00<?, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 16,000 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 30
 "-____-"     Number of trainable parameters = 41,943,040
Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss,rewards / chosen,rewards / rejected,rewards / accuracies,rewards / margins,logps / rejected,logps / chosen,logits / rejected,logits / chosen
1,2.458,-0.181352,-0.095583,0.0,-0.085769,-0.955831,-1.813524,-0.575378,-0.561036
2,2.2914,-0.156385,-0.09023,0.125,-0.066155,-0.902296,-1.563849,-0.946963,-0.791881
3,2.472,-0.119957,-0.096431,0.125,-0.023526,-0.964306,-1.199568,-0.751174,-0.750149
4,2.6514,-0.163623,-0.128386,0.0,-0.035237,-1.283857,-1.63623,-0.88447,-0.942842
5,2.1226,-0.117164,-0.08704,0.25,-0.030124,-0.8704,-1.171636,-0.632536,-0.599691
6,2.228,-0.127774,-0.117087,0.375,-0.010687,-1.170873,-1.277739,-0.611342,-0.6813
7,2.2423,-0.09694,-0.094535,0.5,-0.002405,-0.945352,-0.969399,-0.8205,-0.887684
8,2.1665,-0.105371,-0.069913,0.0,-0.035459,-0.699127,-1.053713,-0.95329,-0.928003
9,2.1142,-0.115563,-0.072658,0.25,-0.042905,-0.726581,-1.155626,-1.110592,-1.152793
10,1.9712,-0.115553,-0.086772,0.0,-0.028782,-0.867716,-1.155531,-0.61843,-0.713999


TrainOutput(global_step=30, training_loss=2.0144440293312074, metrics={'train_runtime': 96.6454, 'train_samples_per_second': 2.483, 'train_steps_per_second': 0.31, 'total_flos': 0.0, 'train_loss': 2.0144440293312074, 'epoch': 0.015})

<a name="Inference"></a>
### Inference

In [6]:
FastLanguageModel.for_inference(model)
inputs = tokenizer(
[
    alpaca_prompt.format(
        "Explain the concept of machine learning",
        "Focus on supervised and unsupervised learning",
        "",
    )
], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 128, use_cache = True)
print(tokenizer.batch_decode(outputs)[0])

<|begin_of_text|>Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
Explain the concept of machine learning

### Input:
Focus on supervised and unsupervised learning

### Response:
Machine learning is a field of computer science that focuses on the development of algorithms that allow computers to learn from data and make predictions without being explicitly programmed. Machine learning is a subset of artificial intelligence, which is the broader field of computer science that deals with the development of intelligent machines. Machine learning algorithms are trained on data to learn patterns and make predictions. There are two main types of machine learning: supervised and unsupervised learning. Supervised learning is a type of machine learning where the algorithm is trained on labeled data, where each data point has a known label or class. The algorithm is trai

<a name="Save"></a>
### Saving the Model

In [7]:
model.save_pretrained("orpo_model")
tokenizer.save_pretrained("orpo_model")

('orpo_model/tokenizer_config.json',
 'orpo_model/special_tokens_map.json',
 'orpo_model/tokenizer.json')