In [1]:
%%capture
!pip install --no-deps bitsandbytes accelerate xformers==0.0.29 peft trl triton
!pip install --no-deps cut_cross_entropy unsloth_zoo
!pip install sentencepiece protobuf datasets huggingface_hub hf_transfer
!pip install --no-deps unsloth

# Load model

In [30]:
from unsloth import FastLanguageModel
import torch
import random
import numpy as np

# based on: https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Llama3.2_(1B_and_3B)-Conversational.ipynb

seed = 42069
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)

max_seq_length = 2048
dtype = None
load_in_4bit = True

model_og, tokenizer_og = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Llama-3.2-3B-Instruct",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit
)

==((====))==  Unsloth 2025.1.8: Fast Llama patching. Transformers: 4.47.1.
   \\   /|    GPU: Tesla T4. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [31]:
model = FastLanguageModel.get_peft_model(
    model_og,
    r = 16,
    target_modules = [
        "lm_head",
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj",
    ],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = seed,
    use_rslora = True,
    loftq_config = None,
)

Unsloth: Offloading output_embeddings to disk to save VRAM


  offloaded_W = torch.load(filename, map_location = "cpu", mmap = True)


Unsloth: Training lm_head in mixed precision to save VRAM


# Process dataset

In [32]:
from unsloth.chat_templates import get_chat_template
from datasets import Dataset
import pandas as pd

tokenizer = get_chat_template(
    tokenizer_og,
    chat_template = "llama-3.1",
)

train_data = pd.read_csv("train.csv")
dataset = Dataset.from_pandas(train_data, preserve_index=False)

In [33]:
class_map = {
    1: "1st",
    2: "2nd",
    3: "3rd"
}

embarkation_map = {
    "C": "Cherbourg",
    "Q": "Queenstown",
    "S": "Southampton",
    "Unknown": "Unknown"
}

def remove_none(d):
    if d is None:
        return "Unknown"
    return str(d)

def data_formatter(data):
    text = ""
    text += "Name: " + data["Name"] + "\n"
    text += "Sex: " + data["Sex"] + "\n"
    text += "Age: " + remove_none(data["Age"]) + "\n"
    text += "Number of siblings and spouses on board: " + str(data["SibSp"]) + "\n"
    text += "Number of parents and children on board: " + str(data["Parch"]) + "\n"
    text += "Ticket class: " + class_map[data["Pclass"]] + "\n"
    text += "Ticket number: " + data["Ticket"] + "\n"
    text += "Passenger fare: " + str(data["Fare"]) + "\n"
    text += "Cabin number: " + remove_none(data["Cabin"]) + "\n"
    text += "Port of embarkation: " + embarkation_map[remove_none(data["Embarked"])] + "\n"

    survived = "Yes" if data["Survived"] == 1 else "No"

    system_prompt = { "role": "system", "content": "You are going to predict if people on the Titanic survived or not. Use the information about the person given below to make the prediction. Answer with a \"Yes\" or a \"No\"." }
    user_prompt = { "role": "user", "content": text }
    assistant_prompt = { "role": "assistant", "content": survived }

    prompts = [system_prompt, user_prompt, assistant_prompt]

    texts = tokenizer.apply_chat_template(prompts, tokenize=False, add_generation_prompt=False)
    return { "system_prompt": system_prompt, "user_prompt": user_prompt, "assistant_prompt": assistant_prompt, "text": texts }

In [34]:
dataset = dataset.map(data_formatter)
dataset_split = dataset.train_test_split(test_size=0.2, shuffle=True, seed=seed)
dataset_train = dataset_split["train"]
dataset_val = dataset_split["test"]

Map:   0%|          | 0/891 [00:00<?, ? examples/s]

# Training

In [35]:
from trl import SFTTrainer
from transformers import TrainingArguments, DataCollatorForSeq2Seq
from unsloth import is_bfloat16_supported

trainer_og = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset_train,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer),
    dataset_num_proc = 2,
    packing = False,
    args = TrainingArguments(
        per_device_train_batch_size = 8,
        gradient_accumulation_steps = 4,
        warmup_steps = 10,
        num_train_epochs = 2,
        learning_rate = 1e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = seed,
        output_dir = "outputs",
        report_to = "none",
        group_by_length = True
    )
)

Map (num_proc=2):   0%|          | 0/712 [00:00<?, ? examples/s]

In [36]:
from unsloth.chat_templates import train_on_responses_only
trainer = train_on_responses_only(
    trainer_og,
    instruction_part = "<|start_header_id|>user<|end_header_id|>\n\n",
    response_part = "<|start_header_id|>assistant<|end_header_id|>\n\n",
)

Map:   0%|          | 0/712 [00:00<?, ? examples/s]

In [None]:
trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 712 | Num Epochs = 2
O^O/ \_/ \    Batch size per device = 8 | Gradient Accumulation steps = 4
\        /    Total batch size = 32 | Total steps = 44
 "-____-"     Number of trainable parameters = 418,316,288


Step,Training Loss
1,0.7583
2,0.6421
3,0.5743
4,0.5003
5,0.1993
6,0.2614
7,0.1737
8,0.3415
9,0.2558
10,0.1469


# Validate

In [None]:
from unsloth.chat_templates import get_chat_template
from transformers import TextIteratorStreamer

FastLanguageModel.for_inference(model)

streamer = TextIteratorStreamer(tokenizer, skip_prompt=True)

val_accuracy = 0

for i in range(len(dataset_val)):
    messages = [
        dataset_val[i]["system_prompt"],
        dataset_val[i]["user_prompt"],
    ]

    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize = True,
        add_generation_prompt = True,
        return_tensors = "pt",
    ).to("cuda")

    model.generate(input_ids=inputs, streamer=streamer, max_new_tokens=128,
                   use_cache=True, temperature=1.0, min_p=0.1)

    generated_text = ""
    for new_text in streamer:
        generated_text += new_text

    model_answer = generated_text[0:generated_text.find("<|eot_id|>")]
    true_answer = dataset_val[i]["assistant_prompt"]["content"]
    print("Iteration:", i, "Model:", model_answer, "True:", true_answer)
    if model_answer == true_answer:
        val_accuracy += 1

val_accuracy /= len(dataset_val)
print("Validation accuracy:", val_accuracy)