In [30]:
%%capture
from dotenv import load_dotenv
load_dotenv()
import torch
major_version, minor_version = torch.cuda.get_device_capability()
# Must install separately since Colab has torch 2.2.1, which breaks packages
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
if major_version >= 8:
    # Use this for new GPUs like Ampere, Hopper GPUs (RTX 30xx, RTX 40xx, A100, H100, L40)
    !pip install --no-deps packaging ninja einops flash-attn xformers trl peft accelerate bitsandbytes
else:
    # Use this for older GPUs (V100, Tesla T4, RTX 20xx)
    !pip install --no-deps xformers trl peft accelerate bitsandbytes
pass

In [32]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/llama-3-8b-bnb-4bit",
    max_seq_length = max_seq_length,
    dtype = dtype,


    load_in_4bit = load_in_4bit,
    token = "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",
)

==((====))==  Unsloth 2025.10.12: Fast Llama patching. Transformers: 4.57.1.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.32.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [33]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes.
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoqtQ
)

## Dataset preparation

In [34]:
import pandas as pd

In [35]:
df = pd.read_csv(r"/content/training-data.csv", encoding="utf-8")


In [36]:
df.head()

Unnamed: 0,Question,Answer
0,What is Prabhu Mahalaxmi Life Insurance Ltd.?,Prabhu Mahalaxmi Life Insurance Ltd. is a newl...
1,What makes Prabhu Mahalaxmi Life Insurance Ltd...,The company combines a strong legacy with a sh...
2,What standards does Prabhu Mahalaxmi Life Insu...,Prabhu Mahalaxmi Life Insurance is dedicated t...
3,What kind of relationships does Prabhu Mahalax...,Prabhu Mahalaxmi Life Insurance is committed t...
4,What does Prabhu Mahalaxmi Life Insurance invi...,Prabhu Mahalaxmi Life Insurance invites custom...


In [37]:
prompt = """Below is an instruction that describes a query. Write a response that appropriately answers the query.

### Instruction:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token

In [38]:
def format_func(dataframe):
  instruction = dataframe['Question']
  response = dataframe['Answer']
  texts = []
  for inst, resp in zip(instruction, response):
    text = prompt.format(inst, resp) + EOS_TOKEN
    texts.append(text)
  return {'text': texts}

In [39]:
import pyarrow as pa
import pyarrow.dataset as ds
import pandas as pd
from datasets import Dataset

hg_dataset = Dataset(pa.Table.from_pandas(df))

In [40]:
hg_dataset

Dataset({
    features: ['Question', 'Answer'],
    num_rows: 126
})

In [41]:
dataset = hg_dataset.map(format_func, batched = True,)

Map:   0%|          | 0/126 [00:00<?, ? examples/s]

## Training

In [42]:
from trl import SFTTrainer
from transformers import TrainingArguments

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        num_train_epochs=5,
        learning_rate = 2e-4,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
    ),
)

Unsloth: Tokenizing ["text"] (num_proc=6):   0%|          | 0/126 [00:00<?, ? examples/s]

In [43]:
trainer_stats = trainer.train()

The model is already on multiple devices. Skipping the move to device specified in `args`.
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 126 | Num Epochs = 5 | Total steps = 80
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 41,943,040 of 8,072,204,288 (0.52% trained)


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33madhikaridipak449[0m ([33madhikaridipak449-palm-mind[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


[34m[1mwandb[0m: Detected [huggingface_hub.inference, openai] in use.
[34m[1mwandb[0m: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
[34m[1mwandb[0m: For more information, check out the docs at: https://weave-docs.wandb.ai/


Step,Training Loss
1,3.0318
2,2.7065
3,2.973
4,2.8573
5,2.4378
6,1.9542
7,1.6507
8,1.4787
9,1.7012
10,1.2896


In [51]:

# Push the model in to your huggingface

# !huggingface-cli login

# model.push_to_hub("sigmoid484829/Llama-3-8b-chat-finetune-test", check_pr=True)

# tokenizer.push_to_hub("sigmoid484829/Llama-3-8b-chat-finetune-test",check_pr=True)