In [None]:
import os
import io
import json
import requests
import base64
from PIL import Image
from datasets import load_dataset
import matplotlib.pyplot as plt

# 1. Load dataset and get a single image
print("Loading dataset...")
dataset = load_dataset("eltorio/ROCOv2-radiology")
example = dataset["test"][2]  # Get the first image from the test set
image = example["image"]
caption = example["caption"]
image_id = example.get("image_id", "sample_image")

print(f"Got image with ID: {image_id}")
print(f"Original caption: {caption}")

# Display the original image
plt.figure(figsize=(10, 10))
plt.imshow(image)
plt.title("Original Image")
plt.axis('off')
plt.show()

# 2. Convert image to base64
buffer = io.BytesIO()
image.save(buffer, format="PNG")
buffer.seek(0)
image_bytes = buffer.getvalue()
base64_image = base64.b64encode(image_bytes).decode('utf-8')

# Decode the base64 image back to verify it's correct
decoded_image_bytes = base64.b64decode(base64_image)
decoded_image = Image.open(io.BytesIO(decoded_image_bytes))

# Display the decoded image to verify it matches the original
plt.figure(figsize=(10, 10))
plt.imshow(decoded_image)
plt.title("Decoded Base64 Image")
plt.axis('off')
plt.show()

# 3. Check how the API expects the image format
print(f"Length of base64 string: {len(base64_image)}")
print(f"First 100 characters of base64: {base64_image[:100]}...")

# The issue might be in how we're formatting the prompt
# Let's try a different format based on Ollama documentation
payload = {
    "model": "llava:13b",
    "messages": [
        {
            "role": "user",
            "content": "Please describe this medical image as a doctor would and include what kind of imaging modality is used such as for example ultrasound, X-ray, CT, MRI."
        },
        {
            "role": "user",
            "content": [
                {
                    "type": "image",
                    "data": base64_image
                }
            ]
        }
    ],
    "stream": True
}

# Rest of your code remains the same...
try:
    print("Sending request with proper JSON format...")
    response = requests.post(
        LLAVA_URL,
        json=payload,
        stream=True
    )
    
    print(f"Response status: {response.status_code}")
    print(f"Response headers: {dict(response.headers)}")
    
    if response.status_code == 200:
        # Process streaming response
        print("Processing streaming response...")
        full_content = ""
        for line in response.iter_lines():
            if line:
                print(f"Line: {line.decode('utf-8')[:100]}...")  # Print first 100 chars
                try:
                    json_line = json.loads(line.decode('utf-8'))
                    if 'message' in json_line and 'content' in json_line['message']:
                        content_chunk = json_line['message']['content']
                        full_content += content_chunk
                        print(f"Chunk added: {content_chunk}")
                except json.JSONDecodeError:
                    print(f"Could not parse line: {line}")
        
        print("\nFinal generated caption:")
        print(full_content)
    else:
        print(f"Error: {response.status_code}, {response.text}")

except Exception as e:
    print(f"Request exception: {e}")

print("\nTroubleshooting complete!")

Sending text-only request to https://ollama.ux.uis.no/api/chat
Response status: 200
Response headers: {'Alt-Svc': 'h3=":443"; ma=2592000', 'Content-Type': 'application/x-ndjson', 'Date': 'Thu, 24 Apr 2025 11:19:27 GMT', 'Server': 'Caddy', 'Transfer-Encoding': 'chunked'}
Response content: {"model":"llava:13b","created_at":"2025-04-24T11:19:27.382097312Z","message":{"role":"assistant","content":" The"},"done":false}
{"model":"llava:13b","created_at":"2025-04-24T11:19:27.408744123Z","message":{"role":"assistant","content":" color"},"done":false}
{"model":"llava:13b","created_at":"2025-04-24T11:19:27.419902541Z","message":{"role":"assistant","content":" of"},"done":false}
{"model":"llava:13b","created_at":"2025-04-24T11:19:27.431275566Z","message":{"role":"assistant","content":" an"

Success! Full response:
Error occurred: Extra data: line 2 column 1 (char 129)


In [None]:
# Step 1: Load the dataset
from datasets import load_dataset
ds = load_dataset("eltorio/ROCOv2-radiology")

# Step 2: Extract images and captions
images = [example["image"] for example in ds["train"]]
captions = [example["caption"] for example in ds["train"]]

In [3]:
datasets load eltorio/ROCOv2-radiology --download_mode=force

SyntaxError: invalid syntax (4104181462.py, line 1)

In [2]:
import requests
import json
from datasets import load_dataset
from tqdm import tqdm
from PIL import Image
import os
from nltk.translate.bleu_score import sentence_bleu
import wandb

In [None]:
# Load the saved features and captions
features = np.load("features.npy")  # Load features from the .npy file
with open("captions.json", "r") as f:
    captions = json.load(f)  # Load captions from the .json file