In [1]:
pip install transformers peft datasets accelerate bitsandbytes

Collecting transformers
  Downloading transformers-4.53.1-py3-none-any.whl.metadata (40 kB)
Collecting peft
  Downloading peft-0.16.0-py3-none-any.whl.metadata (14 kB)
Collecting datasets
  Downloading datasets-3.6.0-py3-none-any.whl.metadata (19 kB)
Collecting accelerate
  Downloading accelerate-1.8.1-py3-none-any.whl.metadata (19 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.46.1-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Collecting filelock (from transformers)
  Using cached filelock-3.18.0-py3-none-any.whl.metadata (2.9 kB)
Collecting huggingface-hub<1.0,>=0.30.0 (from transformers)
  Using cached huggingface_hub-0.33.2-py3-none-any.whl.metadata (14 kB)
Collecting numpy>=1.17 (from transformers)
  Using cached numpy-2.3.1-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (62 kB)
Collecting regex!=2019.12.17 (from transformers)
  Using cached regex-2024.11.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (40 kB)
Collecting tokenizers<0.22,>=0.21

In [3]:
from transformers import AutoModelForCausalLM, AutoTokenizer

In [4]:
from peft import get_peft_model, PrefixTuningConfig, TaskType

In [5]:
model_name = "Qwen/Qwen2-1.5B"

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_8bit=True,  # optional: saves memory
    device_map="auto",
    trust_remote_code=True
)

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors:   0%|          | 0.00/3.09G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/138 [00:00<?, ?B/s]

In [6]:
from peft import get_peft_model, PrefixTuningConfig

peft_config = PrefixTuningConfig(
    task_type=TaskType.CAUSAL_LM,
    inference_mode=False,
    num_virtual_tokens=20,  # Tune as needed
)

model = get_peft_model(model, peft_config)

In [11]:
from datasets import load_dataset

dataset = load_dataset("json", data_files="discharge_summaries.json")
dataset = dataset["train"].train_test_split(test_size=0.1)

# print(dataset["train"].column_names)

def tokenize_function(example):
    inputs = tokenizer(
        example["summary"], 
        padding="max_length",
        truncation=True,
        max_length=512
    )
    inputs["labels"] = inputs["input_ids"].copy()
    return inputs

tokenized_dataset = dataset.map(
    tokenize_function,
    batched=True,
    remove_columns=dataset["train"].column_names
)

Map:   0%|          | 0/1800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

In [14]:
pip install --upgrade transformers

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Note: you may need to restart the kernel to use updated packages.


In [16]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    num_train_epochs=3,
    logging_dir="./logs",
    logging_steps=10,
    save_strategy="epoch",
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
)

trainer.train()

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss
10,8.2563
20,7.9184
30,7.6371
40,7.4316
50,7.2021
60,7.0014
70,6.7132
80,6.4961
90,6.266
100,5.9699


TrainOutput(global_step=2700, training_loss=1.815503844861631, metrics={'train_runtime': 4048.2442, 'train_samples_per_second': 1.334, 'train_steps_per_second': 0.667, 'total_flos': 2.17369782779904e+16, 'train_loss': 1.815503844861631, 'epoch': 3.0})

In [17]:
model.save_pretrained("qwen2-prefix-tuned")
tokenizer.save_pretrained("qwen2-prefix-tuned")

('qwen2-prefix-tuned/tokenizer_config.json',
 'qwen2-prefix-tuned/special_tokens_map.json',
 'qwen2-prefix-tuned/chat_template.jinja',
 'qwen2-prefix-tuned/vocab.json',
 'qwen2-prefix-tuned/merges.txt',
 'qwen2-prefix-tuned/added_tokens.json',
 'qwen2-prefix-tuned/tokenizer.json')

In [18]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel, PeftConfig

# === Step 1: Load PEFT config ===
peft_config = PeftConfig.from_pretrained("qwen2-prefix-tuned")

# === Step 2: Load base model (Qwen2) ===
base_model = AutoModelForCausalLM.from_pretrained(
    peft_config.base_model_name_or_path,
    device_map="auto",
    trust_remote_code=True,
)

# === Step 3: Load PEFT-tuned model ===
model = PeftModel.from_pretrained(base_model, "qwen2-prefix-tuned")

# === Step 4: Load tokenizer ===
tokenizer = AutoTokenizer.from_pretrained("qwen2-prefix-tuned", trust_remote_code=True)

In [27]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel, PeftConfig

# === Load PEFT config and model ===
peft_config = PeftConfig.from_pretrained("qwen2-prefix-tuned")
base_model = AutoModelForCausalLM.from_pretrained(
    peft_config.base_model_name_or_path,
    device_map="auto",
    trust_remote_code=True
)
model = PeftModel.from_pretrained(base_model, "qwen2-prefix-tuned")

# === Load tokenizer ===
tokenizer = AutoTokenizer.from_pretrained("qwen2-prefix-tuned", trust_remote_code=True)

# === Prompt ===
prompt = (
    "Discharge Summary:\n"
    "Patient was admitted for chest pain and shortness of breath.\n"
    "Treatment included IV fluids, oxygen, and monitoring.\n"
    "The patient was discharged with instructions to"
)

# === Tokenize ===
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

# === Generate ===
with torch.no_grad():
    outputs = model.generate(
        **inputs,
        max_new_tokens=200,
        do_sample=True,
        top_p=0.9,
        temperature=0.8,
        repetition_penalty=1.1,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.eos_token_id
    )

# === Decode and print ===
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
print("\n--- Generated Discharge Instructions ---\n")
print(generated_text)




--- Generated Discharge Instructions ---

Discharge Summary:
Patient was admitted for chest pain and shortness of breath.
Treatment included IV fluids, oxygen, and monitoring.
The patient was discharged with instructions to
