In [1]:
%%capture
!pip install unsloth
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

In [2]:
from unsloth import FastLanguageModel
import torch

max_seq_length = 2048
dtype = None
load_in_4bit = True

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/mistral-7b-bnb-4bit",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
==((====))==  Unsloth 2024.10.3: Fast Mistral patching. Transformers = 4.44.2.
   \\   /|    GPU: NVIDIA A100-SXM4-40GB. Max memory: 39.564 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.4.1+cu121. CUDA = 8.0. CUDA Toolkit = 12.1.
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.28.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/4.13G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/155 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.02k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/438 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

Unsloth: We fixed a gradient accumulation bug, but it seems like you don't have the latest transformers version!
Please update transformers via:
`pip uninstall transformers -y && pip install --upgrade --no-cache-dir "git+https://github.com/huggingface/transformers.git"`


In [3]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
)

Unsloth 2024.10.3 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


<a name="Data"></a>
### Data Preparation

In [4]:
sft_prompt = """Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
{}

### Response:
{}"""

def format_prompt(sample):
    return {
        "prompt": sft_prompt.format(sample["instruction"], ""),
        "response": sample["response"]
    }

# Artificial dataset
artificial_dataset = [
    {"instruction": "Explain the concept of machine learning",
     "response": "Machine learning is a subset of artificial intelligence that focuses on creating algorithms and statistical models that enable computer systems to improve their performance on a specific task through experience, without being explicitly programmed."},
    {"instruction": "Describe the process of photosynthesis",
     "response": "Photosynthesis is the process by which plants use sunlight, water, and carbon dioxide to produce oxygen and energy in the form of sugar. It occurs in the chloroplasts of plant cells and is essential for life on Earth."}
]

from datasets import Dataset
dataset = Dataset.from_list(artificial_dataset)
dataset = dataset.map(format_prompt)

Map:   0%|          | 0/2 [00:00<?, ? examples/s]

<a name="Train"></a>
### Initial Training

In [6]:
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    dataset_text_field="prompt",
    args=TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        warmup_steps=2,
        max_steps=10,  # Reduced for demonstration
        learning_rate=5e-5,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=1,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="cosine",
        output_dir="outputs",
        save_strategy="steps",
        save_steps=5,  # Save checkpoint every 5 steps
        report_to="none",  # Disable wandb logging
    ),
    packing=False,  # Disable packing
)

trainer.train()


Map:   0%|          | 0/2 [00:00<?, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 2 | Num Epochs = 10
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 10
 "-____-"     Number of trainable parameters = 41,943,040


**** Unsloth: Please use our fixed gradient_accumulation_steps by updating transformers and Unsloth!


Step,Training Loss
1,0.7597
2,0.7597
3,0.7261
4,0.6259
5,0.5201
6,0.4162
7,0.3648
8,0.3345
9,0.315
10,0.3059


TrainOutput(global_step=10, training_loss=0.5128073573112488, metrics={'train_runtime': 12.4376, 'train_samples_per_second': 6.432, 'train_steps_per_second': 0.804, 'total_flos': 36049119805440.0, 'train_loss': 0.5128073573112488, 'epoch': 10.0})

<a name="Resume"></a>
### Resuming Training from Checkpoint

In [7]:
# Update the training arguments for the resumed training
trainer.args.max_steps = 20  # Train for 10 more steps

# Resume training from the last checkpoint
trainer_stats = trainer.train(resume_from_checkpoint=True)

print(f"Training resumed and completed. Total steps: {trainer.state.global_step}")

**** Unsloth: Please use our fixed gradient_accumulation_steps by updating transformers and Unsloth!


  torch.load(os.path.join(checkpoint, OPTIMIZER_NAME), map_location=map_location)
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 2 | Num Epochs = 20
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 20
 "-____-"     Number of trainable parameters = 41,943,040
  checkpoint_rng_state = torch.load(rng_file)


Step,Training Loss
11,0.3021
12,0.3021
13,0.2532
14,0.2211
15,0.2041
16,0.1906
17,0.1804
18,0.1731
19,0.169
20,0.167


Training resumed and completed. Total steps: 20


<a name="Inference"></a>
### Inference

In [8]:
FastLanguageModel.for_inference(model)
inputs = tokenizer(
[
    sft_prompt.format(
        "Explain the importance of renewable energy sources",
        "",
    )
], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 128, use_cache = True)
print(tokenizer.batch_decode(outputs)[0])

<s> Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
Explain the importance of renewable energy sources

### Response:

Renewable energy sources are crucial for the future of our planet. They provide a clean and sustainable alternative to fossil fuels, which are depleting and harmful to the environment. Renewable energy sources, such as solar, wind, and hydroelectric power, are abundant and can be harnessed to generate electricity without producing harmful emissions.

By transitioning to renewable energy sources, we can reduce our dependence on fossil fuels, which are a major contributor to climate change. Renewable energy sources can also help to create jobs and stimulate economic growth, as the renewable energy


<a name="Save"></a>
### Saving the Model

In [9]:
model.save_pretrained("final_checkpoint_model")
tokenizer.save_pretrained("final_checkpoint_model")

('final_checkpoint_model/tokenizer_config.json',
 'final_checkpoint_model/special_tokens_map.json',
 'final_checkpoint_model/tokenizer.model',
 'final_checkpoint_model/added_tokens.json',
 'final_checkpoint_model/tokenizer.json')