In [2]:
%%capture
!pip install unsloth
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

In [3]:
from unsloth import FastLanguageModel
import torch

max_seq_length = 4096
dtype = None
load_in_4bit = True

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/mistral-7b-bnb-4bit",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
==((====))==  Unsloth 2024.10.3: Fast Mistral patching. Transformers = 4.44.2.
   \\   /|    GPU: NVIDIA A100-SXM4-40GB. Max memory: 39.564 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.4.1+cu121. CUDA = 8.0. CUDA Toolkit = 12.1.
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.28.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/4.13G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/155 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.02k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/438 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

Unsloth: We fixed a gradient accumulation bug, but it seems like you don't have the latest transformers version!
Please update transformers via:
`pip uninstall transformers -y && pip install --upgrade --no-cache-dir "git+https://github.com/huggingface/transformers.git"`


In [4]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
)

Unsloth 2024.10.3 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


<a name="Data"></a>
### Data Prep

In [5]:
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token

def format_prompt(sample):
    instruction = sample["instruction"]
    input_text = sample["input"]
    accepted = sample["accepted"]
    rejected = sample["rejected"]

    sample["prompt"] = alpaca_prompt.format(instruction, input_text, "")
    sample["chosen"] = accepted + EOS_TOKEN
    sample["rejected"] = rejected + EOS_TOKEN
    return sample

from datasets import load_dataset
dataset = load_dataset("reciperesearch/dolphin-sft-v0.1-preference")["train"]
dataset = dataset.map(format_prompt)

README.md:   0%|          | 0.00/490 [00:00<?, ?B/s]

dpo_fixed.jsonl:   0%|          | 0.00/34.1M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/16000 [00:00<?, ? examples/s]

Map:   0%|          | 0/16000 [00:00<?, ? examples/s]

<a name="Train"></a>
### Train the model

In [7]:
from unsloth import PatchDPOTrainer
PatchDPOTrainer()

from trl import DPOTrainer, DPOConfig
from unsloth import is_bfloat16_supported

dpo_trainer = DPOTrainer(
    model = model,
    args = DPOConfig(
        max_length = max_seq_length,
        max_prompt_length = max_seq_length//2,
        max_completion_length = max_seq_length//2,
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        beta = 0.1,
        logging_steps = 1,
        optim = "adamw_8bit",
        lr_scheduler_type = "linear",
        max_steps = 30,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        output_dir = "outputs",
        report_to = "none",  # Disabling W&B here
    ),
    train_dataset = dataset,
    tokenizer = tokenizer,
)

dpo_trainer.train()


Tokenizing train dataset:   0%|          | 0/16000 [00:00<?, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs


**** Unsloth: Please use our fixed gradient_accumulation_steps by updating transformers and Unsloth!


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 16,000 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 30
 "-____-"     Number of trainable parameters = 41,943,040
Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss,rewards / chosen,rewards / rejected,rewards / accuracies,rewards / margins,logps / rejected,logps / chosen,logits / rejected,logits / chosen
1,0.6931,0.0,0.0,0.0,0.0,-68.164459,-134.967575,-3.051872,-2.996558
2,0.6403,-0.034438,-0.149053,0.75,0.114615,-118.072334,-184.524902,-3.148914,-3.097722
3,0.6605,-0.070861,-0.149991,0.5,0.07913,-62.63559,-129.435547,-2.976255,-3.040668
4,0.6283,-0.042874,-0.181936,0.75,0.139062,-50.69101,-104.285713,-2.921355,-2.997522
5,0.522,0.033828,-0.64936,0.5,0.683189,-95.487396,-177.425415,-3.067169,-3.024803
6,0.5358,-0.094339,-0.900406,0.75,0.806067,-89.143684,-181.20314,-2.856241,-2.809486
7,0.6443,-0.009735,-0.120452,0.5,0.110717,-34.679733,-70.409721,-3.06619,-3.057375
8,0.4312,-0.033816,-1.08977,0.875,1.055955,-80.312874,-132.954651,-3.064024,-3.055995
9,0.3546,-0.053668,-1.854913,1.0,1.801245,-122.136749,-237.195648,-3.173294,-3.153392
10,0.3915,-0.645176,-1.611283,0.75,0.966107,-87.839401,-177.894867,-2.997075,-3.026933


TrainOutput(global_step=30, training_loss=0.4422833099961281, metrics={'train_runtime': 128.4413, 'train_samples_per_second': 1.869, 'train_steps_per_second': 0.234, 'total_flos': 0.0, 'train_loss': 0.4422833099961281, 'epoch': 0.015})

<a name="Inference"></a>
### Inference

In [8]:
FastLanguageModel.for_inference(model)
inputs = tokenizer(
[
    alpaca_prompt.format(
        "Explain the process of photosynthesis",
        "Include the role of chlorophyll and the main inputs and outputs",
        "",
    )
], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 128, use_cache = True)
print(tokenizer.batch_decode(outputs)[0])

<s> Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
Explain the process of photosynthesis

### Input:
Include the role of chlorophyll and the main inputs and outputs

### Response:
Photosynthesis is a process that plants use to convert light energy into chemical energy. It is a vital process for all life on Earth, as it provides the oxygen we breathe and the food we eat.

During photosynthesis, plants absorb light energy from the sun through their leaves. This energy is captured by a green pigment called chlorophyll, which is found in the chloroplasts of plant cells. Chlorophyll is responsible for the green color of leaves and is essential for photosynthesis to occur.

The process of photosynthesis involves several steps.


<a name="Save"></a>
### Saving the Model

In [10]:
model.save_pretrained("dpo_model")
tokenizer.save_pretrained("dpo_model")

('dpo_model/tokenizer_config.json',
 'dpo_model/special_tokens_map.json',
 'dpo_model/tokenizer.model',
 'dpo_model/added_tokens.json',
 'dpo_model/tokenizer.json')