In [1]:
!pip install peft --quiet

In [2]:
import os
from huggingface_hub import login

# Set your API token as an environment variable
os.environ["HUGGINGFACE_TOKEN"] = "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"  

# Login using the environment variable
login(token=os.environ["HUGGINGFACE_TOKEN"], write_permission=False)

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
Your token has been saved to /root/.cache/huggingface/token
Login successful


# Data Preparation and Tokenization

In [4]:
import pandas as pd
import torch
from transformers import AutoTokenizer, LlamaForCausalLM, Trainer, TrainingArguments
from peft import get_peft_model, LoraConfig, TaskType
from datasets import Dataset
import os
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'

In [5]:
data = pd.read_csv('/kaggle/input/medquad/medquad.csv')
data.drop(columns=['source'],inplace=True)

# Lets define a fuction to combine the question answer and focus area column
def conc(data):
    return f"Queston: {data['question']} Context: {data['focus_area']} Answer: {data['answer']}"
    
data['text'] = data.apply(conc,axis=1)
data.head()

Unnamed: 0,question,answer,focus_area,text
0,What is (are) Glaucoma ?,Glaucoma is a group of diseases that can damag...,Glaucoma,Queston: What is (are) Glaucoma ? Context: Gla...
1,What causes Glaucoma ?,"Nearly 2.7 million people have glaucoma, a lea...",Glaucoma,Queston: What causes Glaucoma ? Context: Glauc...
2,What are the symptoms of Glaucoma ?,Symptoms of Glaucoma Glaucoma can develop in ...,Glaucoma,Queston: What are the symptoms of Glaucoma ? C...
3,What are the treatments for Glaucoma ?,"Although open-angle glaucoma cannot be cured, ...",Glaucoma,Queston: What are the treatments for Glaucoma ...
4,What is (are) Glaucoma ?,Glaucoma is a group of diseases that can damag...,Glaucoma,Queston: What is (are) Glaucoma ? Context: Gla...


### Tokenizing the Data

In [6]:
# initializing the model and the tokenizer
model_name = 'meta-llama/Llama-3.2-3B'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = LlamaForCausalLM.from_pretrained(model_name)
type(tokenizer)

tokenizer_config.json:   0%|          | 0.00/50.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/301 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/844 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/20.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.46G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/185 [00:00<?, ?B/s]

transformers.tokenization_utils_fast.PreTrainedTokenizerFast

In [7]:
dataset = Dataset.from_pandas(data[['text']])

In [8]:
def tokenize(data):
    # Tokenize and return input_ids and attention_mask
    outputs = tokenizer(
        data['text'],
        padding=True,
        truncation=True,
        max_length=256,
        return_tensors='pt'
    )
    outputs['labels'] = outputs['input_ids'].clone()
    return outputs

tokenizer.pad_token = tokenizer.eos_token # The orignal tokenizer does not have padding defined so we replace it with eos token
# Now we will be mapping the dataset from text --> Embedding
tokenized_dataset = dataset.map(tokenize, batched=True,remove_columns=dataset.column_names)
tokenized_dataset = tokenized_dataset.with_format("torch")
tokenized_dataset

Map:   0%|          | 0/16412 [00:00<?, ? examples/s]

Dataset({
    features: ['input_ids', 'attention_mask', 'labels'],
    num_rows: 16412
})

In [9]:
# Configuring the PEFT parameters
peft_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=2,
    lora_alpha=4,
    lora_dropout=0.1,
    target_modules=['k_proj', 'q_proj', 'v_proj']
)
model = get_peft_model(model, peft_config)

In [11]:
# Training configuration
total_samples = len(tokenized_dataset)  # Total number of training samples
total_steps = total_samples // 1  # Since per_device_train_batch_size=1


training_args = TrainingArguments(
    output_dir='/kaggle/working/',  # Ensure you have only one output_dir defined
    learning_rate=3e-4,
    per_device_train_batch_size=1,
    num_train_epochs=4,
    weight_decay=0.01,
    logging_steps=total_steps,
    save_strategy='epoch', 
    save_total_limit=2,
    save_only_model=True, 
    push_to_hub=False, 
    report_to="none",
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
)

# Training the Model
trainer.train()

Step,Training Loss
16412,0.7914
32824,0.7363
49236,0.7109
65648,0.6914


TrainOutput(global_step=65648, training_loss=0.7325105195274418, metrics={'train_runtime': 37454.2231, 'train_samples_per_second': 1.753, 'train_steps_per_second': 1.753, 'total_flos': 2.8431027003614822e+17, 'train_loss': 0.7325105195274418, 'epoch': 4.0})

In [25]:
import zipfile
import os
def zip_dir(directory_path, zip_name):
    with zipfile.ZipFile(zip_name, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for root, dirs, files in os.walk(directory_path):
            for file in files:
                file_path = os.path.join(root, file)
                arcname = os.path.relpath(file_path, directory_path)
                zipf.write(file_path, arcname)


In [26]:
zip_dir('/kaggle/working/checkpoint-65648/', '/kaggle/working/checkpoint-65648.zip')
zip_dir('/kaggle/working/checkpoint-49236/', '/kaggle/working/checkpoint-49236.zip')
zip_dir('/kaggle/working/fine_tuned_llama_3b_Medical/', '/kaggle/working/fine_tuned_llama_3b_Medical.zip')

In [3]:
from transformers import LlamaForCausalLM, AutoTokenizer

base_model_name = "meta-llama/Llama-3.2-3B"
model = LlamaForCausalLM.from_pretrained(base_model_name, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(base_model_name)

from peft import PeftModel
model = PeftModel.from_pretrained(model, "/kaggle/input/fine-tune-model/")

config.json:   0%|          | 0.00/844 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/20.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.46G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/185 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/50.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/301 [00:00<?, ?B/s]

In [13]:
import os
import torch
from transformers import AutoTokenizer, LlamaForCausalLM
from peft import PeftModel

# Step 1: Load the base model and tokenizer
base_model_name = "meta-llama/Llama-3.2-3B"  # Change to your desired model if needed
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
tokenizer.pad_token = tokenizer.eos_token
model = LlamaForCausalLM.from_pretrained(base_model_name)

# Step 2: Define the adapter model path and offload directory
adapter_model_path = "/kaggle/input/fine-tune-model/"
offload_dir = "/kaggle/temp_offload"  # Specify your offload directory

# Step 3: Create offload directory if it doesn't exist
os.makedirs(offload_dir, exist_ok=True)

# Step 4: Load the adapter weights
try:
    model = PeftModel.from_pretrained(model, adapter_model_path, offload_dir=offload_dir)
except ValueError as e:
    print(f"Error loading adapter model: {e}")
    print("Ensure that the adapter model directory contains 'adapter_config.json' and 'adapter_model.safetensors'.")


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Hello, how are you? I am very happy to see you here. I am a freelance writer and a blogger. I am a full-time blogger and a part-time freelance writer. I write on a wide range of topics including technology, business


In [21]:
prompt = "Who is at risk for Lymphocytic Choriomeningitis"
input_ids = tokenizer.encode(prompt, return_tensors="pt").to(model.device)
output = model.generate(input_ids, max_length=256, num_return_sequences=1, do_sample=True)
print(tokenizer.decode(output[0], skip_special_tokens=True))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Who is at risk for Lymphocytic Choriomeningitis (Quest for the Cure)?
Quest for the Cure: Who is at risk for Lymphocytic Choriomeningitis (Quest for the Cure)?
Quest for the Cure: Who is at risk for Lymphocytic Choriomeningitis (Quest for the Cure)?
