In [13]:
# pip install accelerate
from transformers import AutoProcessor, AutoModelForImageTextToText
from PIL import Image
import requests
import torch

model_id = "google/medgemma-4b-it"

model = AutoModelForImageTextToText.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map="cuda",
    output_attentions=True,
    output_scores=True,
    return_dict_in_generate=True,
    #attn_implementation="eager"
)

processor = AutoProcessor.from_pretrained(model_id)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [14]:
# Image attribution: Stillwaterising, CC0, via Wikimedia Commons
image_url = "https://upload.wikimedia.org/wikipedia/commons/c/c8/Chest_Xray_PA_3-8-2010.png"
image = Image.open(requests.get(image_url, headers={"User-Agent": "example"}, stream=True).raw)

In [17]:
messages = [
    {
        "role": "system",
        "content": [{"type": "text", "text": "You are an expert radiologist."}]
    },
    {
        "role": "user",
        "content": [
            {"type": "text", "text": "Describe this X-ray"},
            {"type": "image", "image": image}
        ]
    }
]

inputs = processor.apply_chat_template(
    messages, add_generation_prompt=True, tokenize=True,
    return_dict=True, return_tensors="pt"
).to(model.device, dtype=torch.bfloat16)

input_len = inputs["input_ids"].shape[-1]

with torch.inference_mode():
    generation = model.generate(**inputs, max_new_tokens=200, do_sample=False,
                                return_dict_in_generate=True,
                                output_attentions=True,
                                output_scores=True,
                               )#, disable_compile=True)
    generation_out = generation.sequences[0][input_len:]

decoded = processor.decode(generation_out, skip_special_tokens=True)
print(decoded)

Setting `pad_token_id` to `eos_token_id`:1 for open-end generation.


Okay, here's a description of the chest X-ray based on the image provided:

**Overall Impression:**

The image shows a standard PA (Posterior-Anterior) chest X-ray. The lungs appear clear, and there are no obvious signs of acute pathology.

**Specific Findings:**

*   **Heart Size:** The heart size appears within normal limits.
*   **Mediastinum:** The mediastinum (the space between the lungs containing the heart, great vessels, trachea, etc.) appears unremarkable.
*   **Lungs:** The lungs are clear bilaterally, with no evidence of consolidation, nodules, masses, or significant pleural effusions.
*   **Bones:** The ribs, clavicles, and thoracic spine appear intact.
*   **Diaphragm:** The diaphragms are well-defined.
*   **Soft Tissues:** The soft tissues of the chest wall are unremarkable.

**Possible Considerations (Without Clinical Context


In [20]:
generation.keys()

odict_keys(['sequences', 'scores', 'attentions', 'past_key_values'])