<a href="https://colab.research.google.com/github/ieea/Agrani_App/blob/main/Fine_tune_BNS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
%%capture
%pip install unsloth

In [11]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
from unsloth import FastLanguageModel
from trl import SFTTrainer
from transformers import TrainingArguments, AutoTokenizer, AutoModelForCausalLM
import torch
import os
from peft import PeftModel

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [4]:
max_seq_length = 2048
dtype = None
load_in_4bit = True
quantized_model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/Llama-3.2-3B-Instruct",
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
)

==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu121. CUDA: 7.5. CUDA Toolkit: 12.1. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/2.24G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/184 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/54.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

In [5]:
lora_layers_and_quantized_model= FastLanguageModel.get_peft_model(
    quantized_model,
    r=16,
    target_modules=["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=3407,
    use_rslora=True,
)

Unsloth 2024.12.4 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.


In [6]:
from datasets import load_dataset

#Importing the dataset
#dataset = load_dataset('csv', data_files='/content/drive/MyDrive/QA_Public_data.xlsx')
#dataset["train"][0]

In [7]:
!pip install openpyxl



In [8]:
!pip install datasets --upgrade   # Upgrade the datasets library to the latest version



In [12]:
from datasets import load_dataset
from datasets import Dataset
import pandas as pd

# Load the Excel file using pandas
df = pd.read_excel('/content/sample_data/QA_Public_data.xlsx')
# Convert the pandas DataFrame to a Hugging Face Dataset
dataset = Dataset.from_pandas(df, split='train')

# Assuming your Excel file has columns for 'Question', 'Answer', and 'Type'
# Create a new 'conversations' column in the desired format
def create_conversation(row):
  return {"conversations": [{"from": "human", "value": row["Question"]}, {"from": "assistant", "value": row["Answer"]}]}

dataset = dataset.map(create_conversation, batched=False)

dataset = dataset.rename_column('Type', 'instruction') # optional: rename to align with the chat template function
# Instead of adding a new column, replace the existing 'conversations' column
dataset = dataset.remove_columns(['Question', 'Answer', 'instruction'])
#Now apply the standardize_sharegpt function
from unsloth.chat_templates import standardize_sharegpt
dataset = standardize_sharegpt(dataset)

# Access the data
print(dataset[0])

Map:   0%|          | 0/2694 [00:00<?, ? examples/s]

Standardizing format:   0%|          | 0/2694 [00:00<?, ? examples/s]

{'conversations': [{'content': 'Is the Bharatiya Nyaya Sanhita established in 2023?', 'role': 'user'}, {'content': 'Yes, the Bharatiya Nyaya Sanhita was established in 2023.', 'role': 'assistant'}]}


In [17]:
def formatting_prompts_func(examples):
  convos = examples["conversations"]
  texts = [tokenizer.apply_chat_template(convo, tokenize= False, add_generation_prompt=False) for convo in convos]
  return {"text": texts}
dataset = dataset.map(formatting_prompts_func, batched=True)

Map:   0%|          | 0/2694 [00:00<?, ? examples/s]

In [18]:
for i in dataset:
  print(i)
  break

{'conversations': [{'content': 'Is the Bharatiya Nyaya Sanhita established in 2023?', 'role': 'user'}, {'content': 'Yes, the Bharatiya Nyaya Sanhita was established in 2023.', 'role': 'assistant'}], 'text': '<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nCutting Knowledge Date: December 2023\nToday Date: 17 Dec 2024\n\n<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nIs the Bharatiya Nyaya Sanhita established in 2023?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nYes, the Bharatiya Nyaya Sanhita was established in 2023.<|eot_id|>'}


In [19]:
print(dataset[10])

{'conversations': [{'content': 'Who is mentioned as incapable of judgment due to intoxication caused against their will?', 'role': 'user'}, {'content': 'A person incapable of judgment by reason of intoxication caused against their will is mentioned.', 'role': 'assistant'}], 'text': '<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nCutting Knowledge Date: December 2023\nToday Date: 17 Dec 2024\n\n<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWho is mentioned as incapable of judgment due to intoxication caused against their will?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nA person incapable of judgment by reason of intoxication caused against their will is mentioned.<|eot_id|>'}


In [21]:
from trl import SFTTrainer
from transformers import TrainingArguments,DataCollatorForSeq2Seq
from unsloth import is_bfloat16_supported
trainer = SFTTrainer (
    model=lora_layers_and_quantized_model,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer),
    dataset_num_proc=2,
    packing=False,
    args=TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        warmup_steps=20,
        max_steps=300,
        learning_rate=1.5e-4,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=10,
        output_dir="outputs",
        optim="adamw_8bit",
        weight_decay=0.02,
        lr_scheduler_type="linear",
        seed=3407
       ),
)

Map (num_proc=2):   0%|          | 0/2694 [00:00<?, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs


In [22]:
from unsloth.chat_templates import train_on_responses_only
trainer = train_on_responses_only(trainer,
                                  instruction_part = "<|start_header_id|>user<|end_header_id|>\n\n",
                                  response_part = "<|start_header_id|>assistant<|end_header_id|>\n\n"
                                  )
trainer_stats= trainer.train()

Map:   0%|          | 0/2694 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 2,694 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 1 | Gradient Accumulation steps = 4
\        /    Total batch size = 4 | Total steps = 300
 "-____-"     Number of trainable parameters = 24,313,856
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Step,Training Loss
10,1.4645
20,1.27
30,1.2628
40,0.8944
50,0.89
60,1.1096
70,0.9417
80,1.0633
90,0.8233
100,0.94


In [24]:
import os
save_dir ="/content/drive/MyDrive/BNS_model_v1"
os.makedirs(save_dir, exist_ok=True)
print(f"Saving model to {save_dir}")

lora_layers_and_quantized_model.save_pretrained(save_dir)
tokenizer.save_pretrained(save_dir)

print(f"Model saved to {save_dir}")

Saving model to /content/drive/MyDrive/BNS_model_v1
Model saved to /content/drive/MyDrive/BNS_model_v1


In [26]:
from unsloth import FastLanguageModel
from peft import PeftModel
import torch

#Base model setup
max_seq_length = 2048
dtype = None
load_in_4bit = True
finetuned_model= "/content/drive/MyDrive/BNS_model_v1"

#load the base model or quantized model and tokeniser

quantized_model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/Llama-3.2-3B-Instruct",
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
)

#Merge the LoRA weights into the base model
final_model= PeftModel.from_pretrained(quantized_model, finetuned_model)
final_model= final_model.merge_and_unload()
save_dir= "/content/drive/MyDrive/BNS_model_v2/merged_model"
final_model.save_pretrained(save_dir)
tokenizer.save_pretrained(save_dir)

print(f"Merged Model saved to {save_dir}")


==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu121. CUDA: 7.5. CUDA Toolkit: 12.1. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!




Merged Model saved to /content/drive/MyDrive/BNS_model_v2/merged_model


In [41]:
from unsloth import FastLanguageModel

finetuned_model = FastLanguageModel.for_inference(lora_layers_and_quantized_model)
messages = [
    {"role": "user", "content": "Can you provide more details about the punishment for causing the death of a woman or causing her to be in a persistent vegetative state during the commission of an offence?"}
]
inputs = tokenizer.apply_chat_template(
    messages,
    tokenize=True,
    add_generation_prompt=True,
    return_tensors="pt").to("cuda")

input_ids = inputs
attention_mask = (input_ids != tokenizer.pad_token_id).long()


In [42]:
outputs = finetuned_model.generate(
    input_ids=input_ids,
    attention_mask=attention_mask,
    max_new_tokens=1024,
    use_cache=True,
    temperature=1.5,
    min_p=0.1
)

decoded_output = tokenizer.batch_decode(outputs, skip_special_tokens=True)

clean_output =[]
for response in decoded_output:
  response = response.split("user\n\n")[1] if "user\n\n" in response else response
  response = response.split("assistant\n\n")[1] if "assistant\n\n" in response else response
  clean_output.append(response)

print(clean_output)

["The text specifies that the punishment can include imprisonment for life, and if the woman's husband or any close relative of her husband was assaulted or otherwise harmed in the same offence, the punishment can extend to death, but no further details are provided."]
