<a href="https://colab.research.google.com/github/gssakash/Decompute_FineTuning/blob/main/Decompute.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This finetuning notebook uses Unsloth AI [https://unsloth.ai/] which proves to be extremely helpful for the purpose of LLM Finetuning.

It also offers a speed boost of 1.6x as compared to FastAttention and more benefits.

References :

1. https://blog.monsterapi.ai/blogs/unsloth-sdpa-integrated-in-monsterapi/#:~:text=Unsloth%20is%20a%20method%20designed,token%20handling%20and%20parallel%20processing.

2. https://huggingface.co/blog/ImranzamanML/fine-tuning-1b-llama-32-a-comprehensive-article

In [None]:
%%capture
!pip install unsloth "xformers==0.0.28.post2"
# Also get the latest nightly Unsloth!
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

In [None]:
!pip uninstall torch torchvision torchaudio -y
!pip install torch torchvision torchaudio

Found existing installation: torch 2.5.0+cpu
Uninstalling torch-2.5.0+cpu:
  Successfully uninstalled torch-2.5.0+cpu
Found existing installation: torchvision 0.20.0+cpu
Uninstalling torchvision-0.20.0+cpu:
  Successfully uninstalled torchvision-0.20.0+cpu
Found existing installation: torchaudio 2.5.0+cpu
Uninstalling torchaudio-2.5.0+cpu:
  Successfully uninstalled torchaudio-2.5.0+cpu
Collecting torch
  Downloading torch-2.5.1-cp310-cp310-manylinux1_x86_64.whl.metadata (28 kB)
Collecting torchvision
  Downloading torchvision-0.20.1-cp310-cp310-manylinux1_x86_64.whl.metadata (6.1 kB)
Collecting torchaudio
  Downloading torchaudio-2.5.1-cp310-cp310-manylinux1_x86_64.whl.metadata (6.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadat

In [None]:
!pip uninstall -y tensorflow && pip install tensorflow-cpu

In [None]:
import os
import torch
import pandas as pd
from datasets import load_dataset, concatenate_datasets
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
from unsloth import FastLanguageModel

# Set maximum sequence length
max_seq_length = 5020

# Load model and tokenizer
model_name = "unsloth/Llama-3.2-1B"
tokenizer = AutoTokenizer.from_pretrained(model_name)

from peft import get_peft_config, LoraConfig, TaskType



In [None]:
max_seq_length = 5020
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/Llama-3.2-1B-bnb-4bit",
    max_seq_length=max_seq_length,
    load_in_4bit=True,
    dtype=None,
)

model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    lora_alpha=16,
    lora_dropout=0,
    target_modules=["q_proj", "k_proj", "v_proj", "up_proj", "down_proj", "o_proj", "gate_proj"],
    use_rslora=True,
    use_gradient_checkpointing="unsloth",
    random_state = 32,
    loftq_config = None,
)
print(model.print_trainable_parameters())

In [None]:
!pip install huggingface_hub

In [None]:
from huggingface_hub import login
login()

In [None]:
from datasets import load_dataset, concatenate_datasets

# Load datasets
boolq_dataset = load_dataset("google/boolq", trust_remote_code=True)
piqa_dataset = load_dataset("ybisk/piqa", split='train', trust_remote_code=True)
winogrande_dataset = load_dataset("allenai/winogrande", 'winogrande_xs', trust_remote_code=True)

def format_boolq(example):
    """
    Format a single example from the BoolQ dataset into a dictionary.
    """
    question = example["question"]  # Extract question
    answer = "Yes" if example["answer"] else "No"  # Convert boolean to Yes/No

    return {
        "text": f"Question: {question}\nAnswer: {answer}"  # Return as text
    }

def format_piqa(example):
    """
    Format a single example from the PIQA dataset into a structured string.
    """
    return {
        "text": f"Goal: {example['goal']}\nSolution 1: {example['sol1']}\nSolution 2: {example['sol2']}"  # Return as text
    }

def format_winogrande(example):
    """
    Format a single example from the Winogrande dataset into a structured string.
    """
    return {
        "text": f"Sentence: {example['sentence']}\nOption 1: {example['option1']}\nOption 2: {example['option2']}"  # Return as text
    }

# Format each dataset
boolq_formatted = boolq_dataset['train'].map(format_boolq)
piqa_formatted = piqa_dataset.map(format_piqa)
winogrande_formatted = winogrande_dataset['train'].map(format_winogrande)

# Ensure all datasets have consistent features before concatenation
boolq_formatted = boolq_formatted.remove_columns([col for col in boolq_formatted.column_names if col not in ["formatted_question", "formatted_answer"]])
piqa_formatted = piqa_formatted.remove_columns([col for col in piqa_formatted.column_names if col not in ["goal", "sol1", "sol2"]])
winogrande_formatted = winogrande_formatted.remove_columns([col for col in winogrande_formatted.column_names if col not in ["sentence", "option1", "option2"]])

# Split the PIQA dataset into train, validation, and test sets
piqa_train_test_split = piqa_formatted.train_test_split(test_size=0.2)  # Split into train and test
piqa_train_val_split = piqa_train_test_split['train'].train_test_split(test_size=0.25)  # Split train into train and validation

# Combine the datasets
combined_dataset = concatenate_datasets([
    boolq_formatted,
    piqa_train_val_split['train'],
    piqa_train_val_split['test'],
    winogrande_formatted
])

In [None]:
# Tokenize the combined dataset with efficient memory usage
def tokenize_function(examples):
    # Combine the goal and solutions into a single text format
    texts = [f"Goal: {g}\nSolution 1: {s1}\nSolution 2: {s2}"
             for g, s1, s2 in zip(examples['goal'], examples['sol1'], examples['sol2'])]

    return tokenizer(texts, padding="max_length", truncation=True, max_length=max_seq_length)

# Tokenize the combined datasets
tokenized_combined_datasets = combined_dataset.map(tokenize_function, batched=True)

# Tokenize the combined datasets
tokenized_combined_datasets = combined_dataset.map(tokenize_function, batched=True)

In [None]:
from transformers import TrainingArguments
from trl import SFTTrainer


# Split the combined dataset into training and evaluation sets
train_test_split = tokenized_combined_datasets.train_test_split(test_size=0.1)  # 10% for evaluation

# Set up training arguments with mixed precision and gradient accumulation to reduce memory footprint
training_args = TrainingArguments(
    output_dir="output",
    per_device_train_batch_size=8,  # Adjusted batch size for memory constraints
    num_train_epochs=40,
    learning_rate=3e-4,
    weight_decay=0.01,
    logging_steps=1,
    save_steps=500,
    evaluation_strategy="steps",
    fp16=True,  # Enable mixed precision training
    gradient_accumulation_steps=4,  # Accumulate gradients over multiple steps
)

# Initialize the SFTTrainer with both train and eval datasets
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_test_split['train'],
    eval_dataset=train_test_split['test'],  # Add evaluation dataset here
    dataset_text_field="text",  # Specify the text field in your dataset
    max_seq_length=max_seq_length,
    dataset_num_proc=2,  # Number of processes for loading datasets
    packing=True,  # Enable packing
    args=training_args,
)


In [None]:

# Start training on the combined dataset while monitoring metrics
trainer.train()

# Clear CUDA cache after training or when needed
torch.cuda.empty_cache()

In [None]:

def infer(question):
    input_text = f"Question: {question}"
    inputs = tokenizer(input_text, return_tensors="pt").to("cuda")

    # Generate response from model using no_grad to save memory during inference
    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=50)

    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Example inference
question_example = "Is Python a programming language?"
print(infer(question_example))