##### Copyright 2024 Google LLC.

In [1]:
# @title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# X-Ray Smoker Analysis

## Overview

A simple AI tool that checks chest X-ray images to predict if the person is likely a smoker or non-smoker, based on visible lung patterns and signs using MedGemma

<table align="left">
  <td>
    <a target="_blank" href="https://colab.research.google.com/drive/1JncmLeZOR1SP4cgC30M6xzjAbsl_fXhN?usp=sharing"><img src="https://www.tensorflow.org/images/colab_logo_32px.png" />Run in Google Colab</a>
  </td>
</table>

## Setup

### Select the Colab runtime

To complete this tutorial, you'll need to have a Colab runtime with sufficient resources to run the MedGemma model. In this case, you can use a T4 GPU:

1. In the upper-right of the Colab window, select **▾ (Additional connection options)**.
2. Select **Change runtime type**.
3. Under **Hardware accelerator**, select **T4 GPU**.

### Install dependencies

Run the cell below to install all the required dependencies.

In [2]:
!pip install --upgrade --quiet accelerate bitsandbytes transformers

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.2/40.2 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.0/67.0 MB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.5/10.5 MB[0m [31m111.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m102.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m76.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m63.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [14]:
import requests
from PIL import Image
from io import BytesIO
import torch
from transformers import pipeline, BitsAndBytesConfig

## Load the model.

Initializing a pre-trained MedGemma-4b-it model using the BitsandBytes quantization configuration

In [4]:
model_id = "google/medgemma-4b-it"
task = "image-text-to-text"

In [5]:
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype="float16",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)

In [6]:
model_kwargs = dict(torch_dtype=torch.bfloat16,
                    device_map="cuda:0",
                    quantization_config = quantization_config
                )

In [7]:
pipe = pipeline(task, model=model_id, model_kwargs=model_kwargs)

config.json:   0%|          | 0.00/1.61k [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/90.6k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.64G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.96G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/173 [00:00<?, ?B/s]

processor_config.json:   0%|          | 0.00/70.0 [00:00<?, ?B/s]

chat_template.jinja:   0%|          | 0.00/1.53k [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


tokenizer_config.json:   0%|          | 0.00/1.16M [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.69M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/33.4M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/35.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

Device set to use cuda:0


In [8]:
pipe.model.generation_config.do_sample = False

## Get Image

You can either directly define the image path i.e., image_url = "/path/image.png" or convert into the bytesIO format.

In [22]:
image_url = "https://www.clinicaladvisor.com/wp-content/uploads/sites/11/2018/12/lungs_0913newsline_451310.jpg"

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
}
response = requests.get(image_url, headers=headers)

In [23]:
response.status_code

200

In [24]:
image = Image.open(BytesIO(response.content))

if image.mode != 'RGB':
    image = image.convert('RGB')

## Define the Prompt Template

In [10]:
query = """Based on the attached chest X-ray,
can you analyze and tell me whether the patient is more likely a smoker or a non-smoker?"""

In [25]:
system = """
You are a medical imaging expert AI trained to analyze chest X-rays and identify potential indicators of smoking history.
Given a chest X-ray image, you must determine whether the scan is more likely to belong to a smoker or non-smoker based on clinical radiological signs such as:

- Lung hyperinflation
- Bullae or blebs
- Increased lung lucency
- Irregular reticular or nodular patterns
- Upper lobe cystic changes
- Emphysematous changes or interstitial thickening

Always include a step-by-step explanation based on observed patterns in the image, and mention the level of confidence (e.g., high, moderate, low).
Be cautious not to make a definitive diagnosis but rather offer a likelihood-based reasoning.
"""

messages = [
    {
        "role": "system",
        "content": [{"type": "text", "text": system}]
    },
    {
        "role": "user",
        "content": [
            {"type": "text", "text": query},
            {"type": "image", "image": image}
        ]
    }
]

## Execute the Prompt

In [None]:
output = pipe(text=messages, max_new_tokens=2048)
response = output[0]["generated_text"][-1]["content"]

In [27]:
print(response)

Okay, I will analyze the chest X-ray and provide a likelihood assessment based on the provided clinical radiological signs.

**Analysis:**

1.  **Lung Hyperinflation:** The image shows a relatively hyperinflated appearance of the lungs, which can be a sign of emphysema, a condition often associated with smoking.
2.  **Increased Lung Lucency:** The lungs appear more translucent or "darker" than normal, indicating increased air in the lungs. This is a common finding in emphysema.
3.  **Bullae or Blebs:** There are some rounded, air-filled structures visible in the lungs, which could be bullae or blebs. These are also associated with emphysema.
4.  **Emphysematous Changes or Interstitial Thickening:** The overall appearance suggests some degree of emphysema, which can be associated with smoking.

**Conclusion:**

Based on the observed signs of lung hyperinflation, increased lung lucency, and possible bullae/blebs, the image is more likely to belong to a smoker.

**Confidence Level:** Mode