In [1]:
# mount
from google.colab import drive
drive.mount('/content/drive')
# drive.mount('/content/drive/', force_remount=True)
import os
os.chdir("/content/drive/MyDrive")

Mounted at /content/drive


# Fine-Tuning distilgpt2 with QLoRA for Medical Symptom Checker


## 1. Install Required Libraries

In [1]:
!pip install datasets chromadb
!pip install bitsandbytes

Collecting bitsandbytes
  Using cached bitsandbytes-0.45.5-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<3,>=2.0->bitsandbytes)
  Using cached nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch<3,>=2.0->bitsandbytes)
  Using cached nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch<3,>=2.0->bitsandbytes)
  Using cached nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch<3,>=2.0->bitsandbytes)
  Using cached nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch<3,>=2.0->bitsandbytes)
  Using cached nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting n

## 2. Import Libraries

In [2]:
import pandas as pd
import os
from datasets import Dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model
import torch
from transformers.trainer_utils import set_seed

## 3. Set Seed for Reproducibility and Set path

In [3]:
set_seed(42)
os.chdir("/content/drive/MyDrive/ZS/data") # change the data path if required


## 4. Load Dataset

Loads a CSV file containing medical symptom queries and doctor responses, converting it to a Hugging Face Dataset.

In [4]:
data = pd.read_csv("data_symptoms.csv") # change the filename as required
dataset = Dataset.from_pandas(data)

## 5. Load Base Model and Tokenizer

In [5]:
model_name = "distilgpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Set padding token
tokenizer.pad_token = tokenizer.eos_token

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/762 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/353M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

## 6. Preprocess Dataset
Formats the dataset by combining symptom queries and doctor responses into a single string format for training.


In [6]:
def preprocess_function(examples):
    inputs = [f"Symptom: {q} Response: {a}" for q, a in zip(examples["symptom_query"], examples["doctor_response"])]
    model_inputs = tokenizer(inputs, max_length=512, truncation=True, padding="max_length")
    model_inputs["labels"] = model_inputs["input_ids"].copy()
    return model_inputs

tokenized_dataset = dataset.map(preprocess_function, batched=True)

Map:   0%|          | 0/99 [00:00<?, ? examples/s]

## 7. Configure QLoRA

Sets up QLoRA for parameter-efficient fine-tuning with quantization.

In [7]:
lora_config = LoraConfig(
    r=8,  # Lower rank for QLoRA
    lora_alpha=16,
    target_modules=["attn.c_attn", "attn.c_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

# Apply QLoRA to the model
model = get_peft_model(model, lora_config)



## 8. Set Training Arguments

Configures training parameters optimized for QLoRA, including mixed precision and 8-bit optimization.

In [8]:
training_args = TrainingArguments(
    output_dir="./symptom_checker_qlora",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=8,
    learning_rate=3e-4,
    num_train_epochs=5,
    logging_steps=10,
    save_strategy="epoch",
    fp16=True,  # Mixed precision for QLoRA
    optim="adamw_8bit",  # 8-bit optimizer for QLoRA
)

## 9. Initialize Trainer and Fine-tune Model

In [9]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
)

# Fine-tune the model
trainer.train()

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mdinker-m[0m ([33mirfan-a-atria-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.


Step,Training Loss
10,9.4934
20,6.9348
30,4.5123


TrainOutput(global_step=30, training_loss=6.980170822143554, metrics={'train_runtime': 26.5622, 'train_samples_per_second': 18.635, 'train_steps_per_second': 1.129, 'total_flos': 56208320888832.0, 'train_loss': 6.980170822143554, 'epoch': 4.32})

## 10. Save Fine-tuned Model

In [10]:
model.save_pretrained("./symptom_checker_qlora_model")
tokenizer.save_pretrained("./symptom_checker_qlora_model")

('./symptom_checker_qlora_model/tokenizer_config.json',
 './symptom_checker_qlora_model/special_tokens_map.json',
 './symptom_checker_qlora_model/vocab.json',
 './symptom_checker_qlora_model/merges.txt',
 './symptom_checker_qlora_model/added_tokens.json',
 './symptom_checker_qlora_model/tokenizer.json')

## 11. Load Models for Comparison

In [11]:
# Load the original and fine-tuned models
original_model_name = "distilgpt2"
fine_tuned_model_path = "./symptom_checker_qlora_model"

# Load tokenizers
original_tokenizer = AutoTokenizer.from_pretrained(original_model_name)
fine_tuned_tokenizer = AutoTokenizer.from_pretrained(fine_tuned_model_path)

# Set padding token
original_tokenizer.pad_token = original_tokenizer.eos_token
fine_tuned_tokenizer.pad_token = fine_tuned_tokenizer.eos_token

# Load models
original_model = AutoModelForCausalLM.from_pretrained(original_model_name)
fine_tuned_model = AutoModelForCausalLM.from_pretrained(fine_tuned_model_path)

## 12. Prepare Models for Evaluation

In [12]:
# Ensure models are in evaluation mode
original_model.eval()
fine_tuned_model.eval()

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
original_model.to(device)
fine_tuned_model.to(device)

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-5): 6 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): lora.Linear(
            (base_layer): Conv1D(nf=2304, nx=768)
            (lora_dropout): ModuleDict(
              (default): Dropout(p=0.05, inplace=False)
            )
            (lora_A): ModuleDict(
              (default): Linear(in_features=768, out_features=8, bias=False)
            )
            (lora_B): ModuleDict(
              (default): Linear(in_features=8, out_features=2304, bias=False)
            )
            (lora_embedding_A): ParameterDict()
            (lora_embedding_B): ParameterDict()
            (lora_magnitude_vector): ModuleDict()
          )
          (c_proj): lora.Linear(
            (base_layer): Conv1D(nf=768, nx=768)
            (l

## 13. Define Response Generation Function

In [13]:
def generate_response(model, tokenizer, symptom, max_length=100):
    input_text = f"Symptom: {symptom} Response:"
    inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True).to(device)
    outputs = model.generate(
        inputs["input_ids"],
        max_length=max_length,
        num_return_sequences=1,
        no_repeat_ngram_size=2,
        do_sample=True,
        top_k=50,
        top_p=0.95,
        temperature=0.7,
        pad_token_id=tokenizer.eos_token_id
    )
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    doctor_response = response.split("Response:")[1].strip() if "Response:" in response else response
    return doctor_response

## 15. Define Test Symptoms

In [14]:
# Test symptom queries
test_symptoms = [
    "I have a persistent cough and fever.",
    "I feel dizzy and have a headache.",
    "My stomach hurts after eating."
]

## 16. Compare Model Outputs

In [15]:
# Compare outputs
print("Comparing Original vs Fine-Tuned Model Outputs\n" + "="*50)
for symptom in test_symptoms:
    print(f"\nSymptom: {symptom}")
    original_response = generate_response(original_model, original_tokenizer, symptom)
    print(f"Original Model: {original_response}")
    fine_tuned_response = generate_response(fine_tuned_model, fine_tuned_tokenizer, symptom)
    print(f"Fine-Tuned Model: {fine_tuned_response}")

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Comparing Original vs Fine-Tuned Model Outputs

Symptom: I have a persistent cough and fever.
Original Model: My cough is more frequent than normal. My fever is higher than usual. Your cough will get worse. If I am coughing, I may need to visit my doctor.

The cough starts in the middle of the night, when my cough does not stop. I will not sleep, but I do not feel sleepy. You will experience a rash. It may be caused by the cough. The cough can be a little longer
Fine-Tuned Model: It is hard to tell.

I have an intermittent cough in my abdomen, but it is not clear if it was an infection, and it may be a result of a cough. I do not know if the symptoms are serious, or if they are just an anorexia, as there is no evidence that I am a healthy person.

Symptom: I feel dizzy and have a headache.
Original Model: A little dizziness and a little confusion. I'm not sure if it's the same feeling as the one I experienced with the other, but I think it is the opposite.

I think that it was very int