In [1]:
# mount
from google.colab import drive
drive.mount('/content/drive')
# drive.mount('/content/drive/', force_remount=True)
import os
os.chdir("/content/drive/MyDrive")

Mounted at /content/drive


# Fine-Tuning distilgpt2 with QLoRA for Medical Symptom Checker


## 1. Install Required Libraries

In [1]:
pip install datasets chromadb



## 2. Import Libraries

In [2]:
import pandas as pd
import os
from datasets import Dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model
import torch
from transformers.trainer_utils import set_seed

## 3. Set Seed for Reproducibility and Set path

In [5]:
set_seed(42)
os.chdir("/content/drive/MyDrive/ZS/data") # change the data path if required


## 4. Load Dataset

Loads a CSV file containing medical symptom queries and doctor responses, converting it to a Hugging Face Dataset.

In [8]:
data = pd.read_csv("data_symptoms.csv") # change the filename as required
dataset = Dataset.from_pandas(data)

## 5. Load Base Model and Tokenizer

In [9]:
model_name = "distilgpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Set padding token
tokenizer.pad_token = tokenizer.eos_token

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/762 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/353M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

## 6. Preprocess Dataset
Formats the dataset by combining symptom queries and doctor responses into a single string format for training.


In [10]:
def preprocess_function(examples):
    inputs = [f"Symptom: {q} Response: {a}" for q, a in zip(examples["symptom_query"], examples["doctor_response"])]
    model_inputs = tokenizer(inputs, max_length=512, truncation=True, padding="max_length")
    model_inputs["labels"] = model_inputs["input_ids"].copy()
    return model_inputs

tokenized_dataset = dataset.map(preprocess_function, batched=True)

Map:   0%|          | 0/99 [00:00<?, ? examples/s]

## 7. Configure QLoRA

Sets up QLoRA for parameter-efficient fine-tuning with quantization.

In [11]:
lora_config = LoraConfig(
    r=8,  # Lower rank for QLoRA
    lora_alpha=16,
    target_modules=["attn.c_attn", "attn.c_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

# Apply QLoRA to the model
model = get_peft_model(model, lora_config)



## 8. Set Training Arguments

Configures training parameters optimized for QLoRA, including mixed precision and 8-bit optimization.

In [12]:
training_args = TrainingArguments(
    output_dir="./symptom_checker_qlora",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=8,
    learning_rate=3e-4,
    num_train_epochs=5,
    logging_steps=10,
    save_strategy="epoch",
    fp16=True,  # Mixed precision for QLoRA
    optim="adamw_8bit",  # 8-bit optimizer for QLoRA
)

## 9. Initialize Trainer and Fine-tune Model

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
)

# Fine-tune the model
trainer.train()

## 10. Save Fine-tuned Model

In [None]:
model.save_pretrained("./symptom_checker_qlora_model")
tokenizer.save_pretrained("./symptom_checker_qlora_model")

## 11. Load Models for Comparison

In [None]:
# Load the original and fine-tuned models
original_model_name = "distilgpt2"
fine_tuned_model_path = "./symptom_checker_qlora_model"

# Load tokenizers
original_tokenizer = AutoTokenizer.from_pretrained(original_model_name)
fine_tuned_tokenizer = AutoTokenizer.from_pretrained(fine_tuned_model_path)

# Set padding token
original_tokenizer.pad_token = original_tokenizer.eos_token
fine_tuned_tokenizer.pad_token = fine_tuned_tokenizer.eos_token

# Load models
original_model = AutoModelForCausalLM.from_pretrained(original_model_name)
fine_tuned_model = AutoModelForCausalLM.from_pretrained(fine_tuned_model_path)

## 12. Prepare Models for Evaluation

In [None]:
# Ensure models are in evaluation mode
original_model.eval()
fine_tuned_model.eval()

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
original_model.to(device)
fine_tuned_model.to(device)

## 13. Define Response Generation Function

In [None]:
def generate_response(model, tokenizer, symptom, max_length=100):
    input_text = f"Symptom: {symptom} Response:"
    inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True).to(device)
    outputs = model.generate(
        inputs["input_ids"],
        max_length=max_length,
        num_return_sequences=1,
        no_repeat_ngram_size=2,
        do_sample=True,
        top_k=50,
        top_p=0.95,
        temperature=0.7,
        pad_token_id=tokenizer.eos_token_id
    )
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    doctor_response = response.split("Response:")[1].strip() if "Response:" in response else response
    return doctor_response

## 15. Define Test Symptoms

In [None]:
# Test symptom queries
test_symptoms = [
    "I have a persistent cough and fever.",
    "I feel dizzy and have a headache.",
    "My stomach hurts after eating."
]

## 16. Compare Model Outputs

In [None]:
# Compare outputs
print("Comparing Original vs Fine-Tuned Model Outputs\n" + "="*50)
for symptom in test_symptoms:
    print(f"\nSymptom: {symptom}")
    original_response = generate_response(original_model, original_tokenizer, symptom)
    print(f"Original Model: {original_response}")
    fine_tuned_response = generate_response(fine_tuned_model, fine_tuned_tokenizer, symptom)
    print(f"Fine-Tuned Model: {fine_tuned_response}")