# Pixtral 12B Multimodal AI Queries on IBM watsonx.ai

This notebook demonstrates how to run multimodal AI queries with Mistral AI's Pixtral 12B model on IBM watsonx.ai.

In [None]:
# Install required packages (uncomment if needed)
# %pip install ibm_watsonx_ai requests Pillow


In [None]:
import base64
import requests
import textwrap
from PIL import Image
from ibm_watsonx_ai import Credentials
from ibm_watsonx_ai.foundation_models import ModelInference

In [None]:
# Replace these with your own IBM watsonx credentials
WATSONX_EU_APIKEY = "YOUR_API_KEY_HERE"
WATSONX_EU_PROJECT_ID = "YOUR_PROJECT_ID_HERE"
URL = "https://eu-gb.ml.cloud.ibm.com"

credentials = Credentials(
    url=URL,
    api_key=WATSONX_EU_APIKEY
)

In [None]:
image_urls = [
    "https://hsc.unm.edu/medicine/departments/dermatology/_images/skin-atlas/acne/acne-type-iv.jpg",
    "https://hsc.unm.edu/medicine/departments/dermatology/_images/skin-atlas/acne/acne-type-i.jpg"
]

In [None]:
def encode_images_to_base64(urls):
    encoded_images = []
    for url in urls:
        response = requests.get(url)
        encoded = base64.b64encode(response.content).decode("utf-8")
        encoded_images.append(encoded)
    return encoded_images

In [None]:
def augment_api_request_body(user_query, image_base64):
    messages = [
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": "You are a helpful assistant. Answer the following user query in 1 or 2 sentences: " + user_query
                },
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/jpeg;base64,{image_base64}"
                    }
                }
            ]
        }
    ]
    return messages

In [None]:
model = ModelInference(
    model_id="mistralai/pixtral-12b",
    credentials=credentials,
    project_id=WATSONX_EU_PROJECT_ID,
    params={"max_tokens": 200}
)

In [None]:
encoded_images = encode_images_to_base64(image_urls)

In [None]:
user_query = "What kind of disease this person is having? Please suggest required medications."

In [None]:
for idx, image_b64 in enumerate(encoded_images):
    print(f"\n===> Querying on image #{idx + 1}")

    messages = augment_api_request_body(user_query, image_b64)

    response = model.chat(messages=messages)

    print("\n<== Response by Pixtral 12B model ==>")
    print(textwrap.fill(response['choices'][0]['message']['content'], width=100))