## Install Required Libraries

In [1]:
!pip install transformers transformers_stream_generator datasets peft -U bitsandbytes trl accelerate tiktoken evaluate rouge_score

Collecting transformers
  Downloading transformers-4.51.3-py3-none-any.whl.metadata (38 kB)
Collecting transformers_stream_generator
  Downloading transformers-stream-generator-0.0.5.tar.gz (13 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting datasets
  Downloading datasets-3.5.1-py3-none-any.whl.metadata (19 kB)
Collecting peft
  Downloading peft-0.15.2-py3-none-any.whl.metadata (13 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.45.5-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting trl
  Downloading trl-0.17.0-py3-none-any.whl.metadata (12 kB)
Collecting accelerate
  Downloading accelerate-1.6.0-py3-none-any.whl.metadata (19 kB)
Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting fsspec<=2025.3.0,>=2023.1.0 (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets)
  Downloading fsspec-

## Load the base model with 4-bit

In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch


model_name = "Qwen/Qwen2.5-3B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
# model = AutoModelForCausalLM.from_pretrained(
#     model_name,
#     load_in_4bit=True,  
#     device_map="auto",
#     trust_remote_code=True
# )



bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

tokenizer_config.json:   0%|          | 0.00/7.30k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/661 [00:00<?, ?B/s]

2025-04-29 12:32:03.477781: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745929923.706511      31 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745929923.771177      31 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


model.safetensors.index.json:   0%|          | 0.00/35.6k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/3.97G [00:00<?, ?B/s]

Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/242 [00:00<?, ?B/s]

In [3]:
## Prepare LoRA configuration
from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],  # these layers are common but you can confirm with model.named_modules()
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

trainable params: 14,966,784 || all params: 3,100,905,472 || trainable%: 0.4827


## Load and Prepare Dataset

In [4]:
from datasets import load_dataset

dataset = load_dataset("lavita/ChatDoctor-HealthCareMagic-100k")
subset = dataset["train"].select(range(5000))
subset

README.md:   0%|          | 0.00/542 [00:00<?, ?B/s]

(…)-00000-of-00001-5e7cb295b9cff0bf.parquet:   0%|          | 0.00/70.5M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/112165 [00:00<?, ? examples/s]

Dataset({
    features: ['instruction', 'input', 'output'],
    num_rows: 5000
})

In [5]:
# Manual split
train_dataset = subset.select(range(4000))                          # 0–3999
val_dataset = subset.select(range(4000, 4500))                      # 4000–4499
test_dataset = subset.select(range(4500, 5000))                     # 4500–4999

# Create DatasetDict
from datasets import DatasetDict
dataset = DatasetDict({
    "train": train_dataset,
    "validation": val_dataset,
    "test": test_dataset
})

dataset

DatasetDict({
    train: Dataset({
        features: ['instruction', 'input', 'output'],
        num_rows: 4000
    })
    validation: Dataset({
        features: ['instruction', 'input', 'output'],
        num_rows: 500
    })
    test: Dataset({
        features: ['instruction', 'input', 'output'],
        num_rows: 500
    })
})

In [6]:
def generate_prompt(example):
    return f"""### Instruction:
{example['instruction']}

### Input:
{example['input']}

### Response:
{example['output']}
"""

dataset = dataset.map(lambda x: {"text": generate_prompt(x)})

Map:   0%|          | 0/4000 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

In [7]:
dataset

DatasetDict({
    train: Dataset({
        features: ['instruction', 'input', 'output', 'text'],
        num_rows: 4000
    })
    validation: Dataset({
        features: ['instruction', 'input', 'output', 'text'],
        num_rows: 500
    })
    test: Dataset({
        features: ['instruction', 'input', 'output', 'text'],
        num_rows: 500
    })
})

In [8]:
dataset['train'][0]

{'instruction': "If you are a doctor, please answer the medical questions based on the patient's description.",
 'input': 'I woke up this morning feeling the whole room is spinning when i was sitting down. I went to the bathroom walking unsteadily, as i tried to focus i feel nauseous. I try to vomit but it wont come out.. After taking panadol and sleep for few hours, i still feel the same.. By the way, if i lay down or sit down, my head do not spin, only when i want to move around then i feel the whole world is spinning.. And it is normal stomach discomfort at the same time? Earlier after i relieved myself, the spinning lessen so i am not sure whether its connected or coincidences.. Thank you doc!',
 'output': 'Hi, Thank you for posting your query. The most likely cause for your symptoms is benign paroxysmal positional vertigo (BPPV), a type of peripheral vertigo. In this condition, the most common symptom is dizziness or giddiness, which is made worse with movements. Accompanying naus

In [9]:
tokenizer.eos_token

'<|im_end|>'

In [10]:
# Set pad token if not already defined
tokenizer.pad_token = tokenizer.eos_token or "<|endoftext|>"

# Tokenize
def tokenize_function(example):
    return tokenizer(
        example["text"],
        truncation=True,
        padding="max_length",
        max_length=512,
    )

tokenized_dataset = dataset.map(
    tokenize_function,
    batched=True,
    remove_columns=dataset["train"].column_names
)

Map:   0%|          | 0/4000 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

## Training Setup

In [15]:
import torch
from transformers import Trainer, TrainingArguments, DataCollatorForLanguageModeling

model.enable_input_require_grads()
model.gradient_checkpointing_enable()
torch.cuda.empty_cache()

training_args = TrainingArguments(
    output_dir="./qwen2.5-medical-lora",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=16,
    learning_rate=2e-4,
    logging_steps=50,
    save_steps=100,
    num_train_epochs=3,
    report_to="none",
    save_total_limit=2,
    fp16=True,
    optim="paged_adamw_32bit",
    remove_unused_columns=False
)

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)

In [16]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"],
    data_collator=data_collator
)

trainer.train()

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss
50,2.2871
100,2.2709
150,2.283
200,2.2324
250,2.2498
300,2.1482
350,2.1203
400,2.1171
450,2.1218
500,2.1174


TrainOutput(global_step=750, training_loss=2.1255455118815103, metrics={'train_runtime': 24610.8134, 'train_samples_per_second': 0.488, 'train_steps_per_second': 0.03, 'total_flos': 1.02840995414016e+17, 'train_loss': 2.1255455118815103, 'epoch': 3.0})

## Evaluation

In [17]:
eval_results = trainer.evaluate(eval_dataset=tokenized_dataset["validation"])
print(f"Evaluation loss: {eval_results['eval_loss']}")

Evaluation loss: 2.23408842086792


In [20]:
eval_results = trainer.evaluate(eval_dataset=tokenized_dataset["test"])
print(f"Evaluation loss: {eval_results['eval_loss']}")

Evaluation loss: 2.255925178527832


## Save fine-tuned LoRA model

In [19]:
model.save_pretrained("./qwen2.5-medical-lora")
tokenizer.save_pretrained("./qwen2.5-medical-lora")

('./qwen2.5-medical-lora/tokenizer_config.json',
 './qwen2.5-medical-lora/special_tokens_map.json',
 './qwen2.5-medical-lora/vocab.json',
 './qwen2.5-medical-lora/merges.txt',
 './qwen2.5-medical-lora/added_tokens.json',
 './qwen2.5-medical-lora/tokenizer.json')

## Inference

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import PeftModel


# Load the base model (same as used in training)

model_name = "Qwen/Qwen2.5-3B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

# Quantization config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

# Load base and LoRA adapter
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

model = PeftModel.from_pretrained(base_model, "./qwen2.5-medical-lora")
model = model.merge_and_unload()

# Tokenizer settings
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [26]:
def clean_response(text):
    text = text.split("<|im_start|>assistant")[-1]
    text = text.split("<|im_end|>")[0].strip()
    if "system" in text:
        text = text.split("system")[-1].strip()
    if "user" in text:
        text = text.split("user")[-1].strip()
    return text



def generate_response(instruction, input_text=""):
    prompt = f"""<|im_start|>system
You are a helpful medical assistant.<|im_end|>
<|im_start|>user
{instruction}
{input_text}<|im_end|>
<|im_start|>assistant
"""
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512).to(model.device)

    outputs = model.generate(
        **inputs,
        max_new_tokens=512,
        temperature=0.7,
        top_p=0.9,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id
    )

    decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return clean_response(decoded)

In [28]:
def print_result(instruction, response, input_text=None):
    print("\n" + "=" * 80)
    print("🧾  Instruction:")
    print(f"{instruction}")

    if input_text:
        print("\n📝  Input:")
        print(f"{input_text}")

    print("\n💬  Response:")
    print("-" * 80)
    print(response.strip())
    print("-" * 80)
    print("=" * 80 + "\n")



# Example 1
instruction1 = "What are the symptoms of diabetes?"
response1 = generate_response(instruction1)
print_result(instruction1, response1)

# Example 2
instruction2 = "What treatment would you recommend?"
input_text2 = "The patient has been diagnosed with type 2 diabetes and has a history of hypertension."
response2 = generate_response(instruction2, input_text2)
print_result(instruction2, response2, input_text2)


🧾  Instruction:
What are the symptoms of diabetes?

💬  Response:
--------------------------------------------------------------------------------
What are the symptoms of diabetes?

assistant
The symptoms of diabetes can vary, and they may be mild at first or not noticeable at all. However, some common symptoms include:

1. **Increased Thirst and Urination**: People with diabetes often drink more fluids than usual and go to the bathroom more frequently, especially at night.

2. **Frequent Feeding and Hunger Pangs**: Despite eating more food, you might feel hungry and experience more frequent episodes of hunger.

3. **Unusual Weight Loss**: Some people with diabetes lose weight even though their appetite is high. This is because the body starts burning fat for energy when it can't use glucose properly.

4. **Fatigue**: You might feel very tired or weak, especially after eating.

5. **Blurred Vision**: High blood sugar levels can cause your eyes to become cloudy, leading to blurred visi

## Upload model on huggingFace

In [29]:
! pip install huggingface_hub

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




In [35]:
# !huggingface-cli login

In [34]:
import getpass

# Prompt the user to enter the Hugging Face token (hidden input)
huggingface_token = getpass.getpass("Enter your Hugging Face token: ")

# Now use the token securely
print("Token has been securely captured!")

Enter your Hugging Face token:  ········


Token has been securely captured!


In [37]:
from huggingface_hub import login
import os

# Set the Hugging Face token in the environment for authentication
os.environ["HUGGINGFACE_TOKEN"] = huggingface_token



# Log in to Hugging Face with the token from environment variables
login(token=huggingface_token)

## Push Files to Hugging Face:

In [47]:
from huggingface_hub import HfApi

api = HfApi()
api.upload_folder(
    folder_path="/kaggle/working/qwen2.5-medical-lora",
    repo_id="AbdullahAlnemr1/qwen2.5-medical-lora",
    repo_type="model",
    token="your_token_here"  # Or set as an environment variable
)

adapter_model.safetensors:   0%|          | 0.00/59.9M [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/59.9M [00:00<?, ?B/s]

scaler.pt:   0%|          | 0.00/988 [00:00<?, ?B/s]

optimizer.pt:   0%|          | 0.00/120M [00:00<?, ?B/s]

Upload 16 LFS files:   0%|          | 0/16 [00:00<?, ?it/s]

rng_state.pth:   0%|          | 0.00/14.2k [00:00<?, ?B/s]

scheduler.pt:   0%|          | 0.00/1.06k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/5.30k [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/59.9M [00:00<?, ?B/s]

optimizer.pt:   0%|          | 0.00/120M [00:00<?, ?B/s]

rng_state.pth:   0%|          | 0.00/14.2k [00:00<?, ?B/s]

scaler.pt:   0%|          | 0.00/988 [00:00<?, ?B/s]

scheduler.pt:   0%|          | 0.00/1.06k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/5.30k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/AbdullahAlnemr1/qwen2.5-medical-lora/commit/51cf8308a73e396dadb831b166103f47d2937d96', commit_message='Upload folder using huggingface_hub', commit_description='', oid='51cf8308a73e396dadb831b166103f47d2937d96', pr_url=None, repo_url=RepoUrl('https://huggingface.co/AbdullahAlnemr1/qwen2.5-medical-lora', endpoint='https://huggingface.co', repo_type='model', repo_id='AbdullahAlnemr1/qwen2.5-medical-lora'), pr_revision=None, pr_num=None)

In [48]:
# !du -sh /kaggle/working/qwen2.5-medical-lora

In [49]:
# !git config --global user.name "AbdullahAlnemr1"
# !git config --global user.email "alnemrabdullah2@gmail.com"

In [50]:
# import getpass
# import os

# # Prompt user to enter Hugging Face token (this will stay hidden)
# token = getpass.getpass("Enter your Hugging Face token: ")

# # Format the repo URL with token (DON'T expose this!)
# repo_url = f"https://{token}@huggingface.co/AbdullahAlnemr1/qwen2.5-medical-lora"

# # Go to your model directory
# %cd /kaggle/working/qwen2.5-medical-lora

# # Initialize Git repo and push
# !git init
# !git remote remove origin || true  # remove old remote if exists
# !git remote add origin {repo_url}
# !git branch -M main
# !git add .
# !git commit -m "Initial upload of LoRA Qwen2.5 Medical"
# !git push -u origin main