In [1]:
!pip install unsloth
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

Collecting unsloth
  Using cached unsloth-2024.12.12-py3-none-any.whl.metadata (60 kB)
Using cached unsloth-2024.12.12-py3-none-any.whl (175 kB)
Installing collected packages: unsloth
Successfully installed unsloth-2024.12.12
Found existing installation: unsloth 2024.12.12
Uninstalling unsloth-2024.12.12:
  Successfully uninstalled unsloth-2024.12.12
Collecting unsloth@ git+https://github.com/unslothai/unsloth.git (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-6iy_v4eh/unsloth_d1c990bc93c44155aeebdda77d4a837f
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-install-6iy_v4eh/unsloth_d1c990bc93c44155aeebdda77d4a837f
  Resolved https://github.com/unslothai/unsloth.git to commit 87f5bffc45a8af7f23a41650b30858e097b86418
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing

In [5]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048
dtype = None # Auto detection of dtype
load_in_4bit = True # Reduce memory usage with 4-bit quantization
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/Llama-3.2-3B-Instruct-bnb-4bit",  # Corrected model name
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
)


==((====))==  Unsloth 2024.12.12: Fast Llama patching. Transformers: 4.47.1.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu121. CUDA: 7.5. CUDA Toolkit: 12.1. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [6]:
model = FastLanguageModel.get_peft_model(
 model,
 r = 16, # LoRA rank
 target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
 lora_alpha = 16,
 lora_dropout = 0, # Optimized at 0
 bias = "none", # No additional bias terms
 use_gradient_checkpointing = "unsloth", # Gradient checkpointing to save memory
 random_state = 3407,
 use_rslora = False, # Rank stabilized LoRA, can be enabled for stability
)

Unsloth 2024.12.12 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.


In [7]:
from datasets import load_dataset
dataset = load_dataset("charanhu/kannada-instruct-dataset-390-k", split="train")
from unsloth.chat_templates import standardize_sharegpt
dataset = standardize_sharegpt(dataset)

In [8]:
def formatting_prompts_func(examples):
 convos = examples["conversations"]
 texts = [tokenizer.apply_chat_template(convo, tokenize=False, add_generation_prompt=False) for convo in convos]
 return { "text": texts }
dataset = dataset.map(formatting_prompts_func, batched=True)

Map:   0%|          | 0/389608 [00:00<?, ? examples/s]

In [9]:
from trl import SFTTrainer
from transformers import TrainingArguments, DataCollatorForSeq2Seq
from unsloth import is_bfloat16_supported
trainer = SFTTrainer(
 model = model,
 tokenizer = tokenizer,
 train_dataset = dataset,
 dataset_text_field = "text",
 max_seq_length = max_seq_length,
 data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer),
 dataset_num_proc = 2,
 packing = False,
 args = TrainingArguments(
 per_device_train_batch_size = 2,
 gradient_accumulation_steps = 4,
 warmup_steps = 5,
 max_steps = 60,
 learning_rate = 2e-4,
 fp16 = not is_bfloat16_supported(),
 bf16 = is_bfloat16_supported(),
 logging_steps = 1,
 optim = "adamw_8bit",
 weight_decay = 0.01,
 lr_scheduler_type = "linear",
 seed = 3407,
 output_dir = "outputs",
 ),
)

Map (num_proc=2):   0%|          | 0/389608 [00:00<?, ? examples/s]

In [10]:
from unsloth.chat_templates import train_on_responses_only
trainer = train_on_responses_only(
 trainer,
 instruction_part = "<|start_header_id|>user<|end_header_id|>\n\n",
 response_part = "<|start_header_id|>assistant<|end_header_id|>\n\n",
)
trainer_stats = trainer.train()

Map:   0%|          | 0/389608 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 389,608 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 60
 "-____-"     Number of trainable parameters = 24,313,856
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Step,Training Loss
1,0.4865
2,0.4906
3,0.5727
4,0.4764
5,0.5081
6,0.428
7,0.4895
8,0.4199
9,0.6002
10,0.445


In [15]:
messages = [
 {"role": "user", "content": "ಈ ಕೆಳಗಿನ ಭಾಗವು 1/4 ಕ್ಕೆ ಏಕೆ ಸಮನಾಗಿರುತ್ತದೆ ಎಂಬುದನ್ನು ವಿವರಿಸಿ ಇಲ್ಲಿ ಇನ್ಪುಟ್ 4/16 ಆಗಿದೆ."}
]
FastLanguageModel.for_inference(model) # Enable native 2x faster inference

inputs = tokenizer.apply_chat_template(
 messages,
 tokenize=True,
 add_generation_prompt=True,
 return_tensors="pt",
).to("cuda")
outputs = model.generate(input_ids=inputs, max_new_tokens=1024, use_cache=True, temperature=1.5, min_p=0.1)
tokenizer.batch_decode(outputs)

['<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nCutting Knowledge Date: December 2023\nToday Date: 05 Jan 2025\n\n<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nಈ ಕೆಳಗಿನ ಭಾಗವು 1/4 ಕ್ಕೆ ಏಕೆ ಸಮನಾಗಿರುತ್ತದೆ ಎಂಬುದನ್ನು ವಿವರಿಸಿ ಇಲ್ಲಿ ಇನ್ಪುಟ್ 4/16 ಆಗಿದೆ.<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nಮತ್ತೊಂದು 4/16 ಸಂದರ್ಭದಲ್ಲಿ, ಭಾಗವು 3/4 ಅನ್ನು ಸಂಗ್ರಹಿಸಿದೆ, ಇದು 3/4 ಹಾಗೂ 3/4 ಇವಲ್ಲಿ ಸಾಧಕ ಸಂಯೋಜನೆ ಎಂಬುದನ್ನು ಸೂಚಿಸುತ್ತದೆ, ಆದ್ದರಿಂದ ಇದನ್ನು 1/4 ಆಗಿವೆ. 2/3 ಸಾಧಕ ಸಂಯೋಜನೆಗಳಲ್ಲಿ ಏಕಪಕ್ಷೀಕರಣಗಳಲ್ಲಿರುವುದು 2/3 ಎಂಬುದನ್ನು ಸಾಧಿಸುತ್ತದೆ, ಅತಿಯಾದ ಎಂಬುದಕ್ಕಿಂತ ಒಂದು ವ್ಯತ್ಯಾಸದ ವರದಿ ಹೊಂದಿರಬಹುದು. ಉದಾಹರಣೆಗೆ ಎಕ್ಸ್ಪಿರೆಸನ್ಟ್ 2/3 ಉಳಿದ 2/3 ನಿಂದ 3/3 ಎಂಬುದಿಗೆ ಸಮಾನವಾಗಬಹುದು. 1 ಕ್ಕೆ ಅನುಕೂಲವಾಗದ ವ್ಯವಧಿಯ ಜೊತೆಗೆ, ನಾಲ್ಕು 1ನ ಆಮ್ಲವನ್ನು ಹೊರತೆಗೆದುಕೊಂಡರೆ, 16 ಅನ್ನು 12 ಎಂಬುದಿಗೆ ಸಮಾನ ವಿರಾಮವನ್ನು ನೀಡುತ್ತದೆ ಮತ್ತು 3/16ಕ್ಕೆ ಸಮಾನವಾಗಬಹುದು.<|eot_id|>']