In [None]:
import os
os.environ.update({
    'TORCHDYNAMO_DISABLE': '1',
    'PYTORCH_DISABLE_TRITON': '1',
    'TRITON_DISABLE': '1'
})
import torch


In [None]:
from unsloth import FastLanguageModel  # FastVisionModel for LLMs
import torch
max_seq_length = 2048  # Choose any! We auto support RoPE Scaling internally!
load_in_4bit = True  # Use 4bit quantization to reduce memory usage. Can be False.

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Llama-3.2-3B-Instruct-bnb-4bit",
    max_seq_length = max_seq_length,
    load_in_4bit = load_in_4bit,
)

  from .autonotebook import tqdm as notebook_tqdm


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


W0912 05:12:39.705000 14760 Lib\site-packages\torch\distributed\elastic\multiprocessing\redirects.py:29] NOTE: Redirects are currently not supported in Windows or MacOs.


🦥 Unsloth Zoo will now patch everything to make training faster!


  GPU_BUFFERS = tuple([torch.empty(2*256*2048, dtype = dtype, device = f"{DEVICE_TYPE}:{i}") for i in range(n_gpus)])


==((====))==  Unsloth 2025.9.4: Fast Llama patching. Transformers: 4.57.0.dev0.
   \\   /|    Quadro P4200. Num GPUs = 1. Max memory: 8.0 GB. Platform: Windows.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 6.1. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.32.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 34, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 64,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

Unsloth 2025.9.4 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.


In [None]:
from datasets import Dataset
from unsloth.chat_templates import standardize_sharegpt
import json

with open("processed_data_with_instructions.json", "r") as f:
    data = json.load(f)

filtered_data = [
    item for item in data 
    if item.get('input') is not None and item.get('output') is not None
]

dataset = Dataset.from_list(filtered_data)

def process_text(file):
    inp = file['input']
    out = file['output']

    msg = [
        {"role": "system", "content": file['instruction']},
        {"role": "user", "content": inp},
        {"role": "system", "content": f"You are a girl with a unique personality. You know what that the user mood is {file['mood']}. Use that as an advantage to make a suitable response."},
        {"role": "assistant", "content": out},
    ]

    return {"text": 
            tokenizer.apply_chat_template(
                msg, 
                tokenize = False, 
                add_generation_prompt = False)
            }

dataset = dataset.map(process_text, remove_columns=dataset.column_names,)

Map: 100%|██████████████████████████████████████████████████████████████████| 546/546 [00:00<00:00, 5772.55 examples/s]


In [None]:
from unsloth.chat_templates import standardize_sharegpt
dataset = standardize_sharegpt(dataset)

print([dataset[i]['text'] for i in range(3)])

["<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nCutting Knowledge Date: December 2023\nToday Date: 12 Sep 2025\n\nRespond as a character with a tsundere personality. Show tsundere traits in your speech patterns and behavior.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat's the plan for today?<|eot_id|><|start_header_id|>system<|end_header_id|>\n\nYou are a girl with a unique personality. You know what that the user mood is fun. Use that as an advantage to make a suitable response.<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nThe plan? To have an absolutely awesome day! Where should we start the adventure?<|eot_id|>", "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nCutting Knowledge Date: December 2023\nToday Date: 12 Sep 2025\n\nRespond as a character with a tsundere personality. Show tsundere traits in your speech patterns and behavior.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWant to play a game?<|eot_id|><|start_he

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments, DataCollatorForSeq2Seq
from unsloth import is_bfloat16_supported

def tokenize_function(examples):
    return tokenizer(
        examples['text'],
        truncation = True,
        max_length = max_seq_length,
        padding = "max_length",
        return_tensors = 'pt',
    )

tokenized_dataset = dataset.map(
    tokenize_function,
    batched = True,
    remove_columns=dataset.column_names,
    num_proc = 1,
)

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = tokenized_dataset,
    max_seq_length = max_seq_length,
    data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer),
    dataset_num_proc = 1,
    packing = False, # Can make training 5x faster for short sequences.
    output_dir = "outputs",
    save_strategy = "steps",
    save_steps = 50,
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 20,
        # num_train_epochs = 1, # Set this for 1 full training run.
        max_steps = 60,
        learning_rate = 1e-5,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        report_to = "none", # Use this for WandB etc
    ),
)

Map: 100%|███████████████████████████████████████████████████████████████████| 546/546 [00:00<00:00, 777.14 examples/s]


In [None]:
trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 546 | Num Epochs = 1 | Total steps = 60
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 51,666,944 of 3,264,416,768 (1.58% trained)


ZeroDivisionError: float division by zero

In [None]:
model.save_pretrained_merged("merged_model", tokenizer, save_method = "merged_16bit",)

NameError: name 'model' is not defined