In [None]:
%pip install \
    datasets \
    evaluate \
    rouge_score\
    loralib \
    evaluate \
    accelerate \
    bitsandbytes \
    trl \
    peft \
    -U --quiet

In [None]:
from huggingface_hub import login
login()

In [None]:
import pandas as pd
import torch
from transformers import AutoTokenizer, LlamaForCausalLM, Trainer, TrainingArguments,BitsAndBytesConfig
from peft import LoraConfig, PeftModel
from trl import SFTTrainer
from datasets import Dataset
import os
import warnings
warnings.filterwarnings("ignore")
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [None]:
model_name = 'meta-llama/Llama-3.2-3B-Instruct'
tokenizer = AutoTokenizer.from_pretrained(model_name)

compute_dtype = getattr(torch, "float16")

quant_config = BitsAndBytesConfig(
    load_in_8bit=True,  
    bnb_8bit_compute_dtype=compute_dtype, 
)

model = LlamaForCausalLM.from_pretrained(model_name, quantization_config=quant_config,torch_dtype=compute_dtype)
print(type(tokenizer))

model.config.use_cache = False
model.config.pretraining_tp = 1

In [None]:
data = pd.read_csv('/kaggle/input/medquad/medquad.csv')
data.drop(columns=['source'], inplace=True)
def conc(data):
    return f"{data['question']} ({data['focus_area']})"

data['question'] = data.apply(conc, axis=1)
data.drop(columns=['focus_area'], inplace=True)

data['answer_word_count'] = data['answer'].apply(lambda x: len(str(x).split()))
data = data[data['answer_word_count'] <= 512].drop(columns=['answer_word_count'])

def tokenize_function(row):
    question = str(row['question'])
    answer = str(row['answer']) 
    
    row['input_ids'] = tokenizer(question, padding="max_length", truncation=True, max_length=128, return_tensors="pt").input_ids[0]
    row['labels'] = tokenizer(answer, padding="max_length", truncation=True, max_length=512, return_tensors="pt").input_ids[0]
    
    return row

tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
tokenized_data = data.apply(tokenize_function, axis=1)

tokenized_data['input_ids'] = tokenized_data['input_ids'].apply(lambda x: x.tolist())
tokenized_data['labels'] = tokenized_data['labels'].apply(lambda x: x.tolist())
tokenized_data.head()

In [None]:
dataset = Dataset.from_pandas(tokenized_data)
tokenized_datasets = dataset.map(tokenize_function)
tokenized_datasets = tokenized_datasets.remove_columns(['question', '__index_level_0__','answer'])
tokenized_datasets

In [None]:
def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"trainable model parameters: {trainable_model_params}\nall model parameters: {all_model_params}\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"

print(print_number_of_trainable_model_parameters(model))

In [None]:
from peft import get_peft_model, TaskType
# Configuring the PEFT parameters
peft_args = LoraConfig(
    lora_alpha=64,
    lora_dropout=0.1,
    r=32,
    bias="none",
    task_type="CAUSAL_LM",
)
peft_model = get_peft_model(model, peft_args)
print(print_number_of_trainable_model_parameters(peft_model))

In [None]:
# Set training parameters
training_params = TrainingArguments(
    output_dir="./Output",
    num_train_epochs=1,
    per_device_train_batch_size=8,
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    save_steps=1000,
    logging_steps=1000,
    learning_rate=2e-5,
    weight_decay=0.001,
    max_grad_norm=0.3,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="cosine",
    logging_dir="./logs",
)

In [None]:
trainer = SFTTrainer(
    model=peft_model,
    train_dataset=dataset,
    peft_config=peft_args,
    dataset_text_field="text",
    max_seq_length=512,
    tokenizer=tokenizer,
    args=training_params,
    packing=False,
)
trainer.train()

# Loading the FineTuned Model

In [None]:
import os
import torch
from transformers import AutoTokenizer, LlamaForCausalLM
from peft import PeftModel

base_model_name = "meta-llama/Llama-3.2-3B" 
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
tokenizer.pad_token = tokenizer.eos_token

model = LlamaForCausalLM.from_pretrained(base_model_name)
adapter_model_path = "/kaggle/input/fine-tune-model/"
offload_dir = "/kaggle/temp_offload" 
os.makedirs(offload_dir, exist_ok=True)

model = PeftModel.from_pretrained(model, adapter_model_path, offload_dir=offload_dir)

In [None]:
# Combine system and user prompts
prompt = "Who is at risk for Alkhurma Hemorrhagic Fever (AHF)"

# Encode the combined prompt
input_ids = tokenizer.encode(prompt, return_tensors="pt").to(model.device)

# Generate text with adjusted parameters
output = model.generate(
    input_ids,
    max_length=64,
    num_return_sequences=1,
    use_cache=True,
    temperature=1.5,   
    top_p=0.01,      
)

# Decode and print the output
print(tokenizer.decode(output[0], skip_special_tokens=True))
