Personalized Impression Generation for PET Reports Using Large Language Models
https://pmc.ncbi.nlm.nih.gov/articles/PMC11031527/
https://github.com/xtie97/PET-Report-Expert-Evaluation

In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
finetuned_model = "xtie/PEGASUS-PET-impression"
tokenizer = AutoTokenizer.from_pretrained(finetuned_model) 
model = AutoModelForSeq2SeqLM.from_pretrained(finetuned_model, ignore_mismatched_sizes=True).eval()
model.to('cuda')

PegasusForConditionalGeneration(
  (model): PegasusModel(
    (shared): Embedding(96103, 1024, padding_idx=0)
    (encoder): PegasusEncoder(
      (embed_tokens): Embedding(96103, 1024, padding_idx=0)
      (embed_positions): PegasusSinusoidalPositionalEmbedding(1024, 1024)
      (layers): ModuleList(
        (0-15): 16 x PegasusEncoderLayer(
          (self_attn): PegasusAttention(
            (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
          )
          (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
          (activation_fn): ReLU()
          (fc1): Linear(in_features=1024, out_features=4096, bias=True)
          (fc2): Linear(in_features=4096, out_features=1024, bias=True)
          (final_layer_no

In [3]:
findings_info ="""
Description: PET CT WHOLE BODY
Radiologist: James
Findings:

Head/Neck: 
Brain: Symmetric cortical FDG uptake. No focal hyper- or hypometabolic lesions.
Nasopharynx/Oral cavity: Physiologic activity only.
Cervical nodes: No FDG-avid cervical lymphadenopathy; largest node level II < 5 mm short-axis.

Chest: 
Lungs:
Irregular spiculated mass in right upper lobe apex measuring 3.1 × 2.6 cm (previously 3.4 × 2.9 cm) with mildly decreased but persistent FDG uptake (SUVmax 4.2; prior 6.7).

No new pulmonary nodules.
Abdomen/Pelvis: xxx Extremities/Musculoskeletal: xxx
Indication:
The patient is a 60-year old male with a history of xxx
"""

inputs = tokenizer(findings_info.replace('\n', ' '),
                  padding="max_length",
                  truncation=True,
                  max_length=1024,
                  return_tensors="pt")
input_ids = inputs.input_ids.to("cuda")
attention_mask = inputs.attention_mask.to("cuda")
outputs = model.generate(input_ids,
                        attention_mask=attention_mask,
                        max_new_tokens=512, 
                        num_beam_groups=1,
                        num_beams=4, 
                        do_sample=False,
                        diversity_penalty=0.0,
                        num_return_sequences=1, 
                        length_penalty=2.0,
                        no_repeat_ngram_size=3,
                        early_stopping=True
                        )

output_str = tokenizer.decode(outputs[0],
                              skip_special_tokens=True)

In [4]:
output_str

'[1] Irregular spiculated mass in right upper lobe apex with mildly decreased but persistent FDG uptake (SUVmax 4.2; prior 6.7). No new pulmonary nodules. [2] No FDG-avid metastatic disease.'