In [1]:
import dspy

In [2]:

# Clear DSPy's global disk cache
if hasattr(dspy, 'cache') and hasattr(dspy.cache, 'disk_cache'):
    dspy.cache.disk_cache.clear()
    print("clear")

clear


# Ollama solution

In [3]:
from package.base import DriverLM, ModelResponse, Usage
import httpx

ollama_client = httpx.Client(timeout=600.0)
def ollama_request_fn(prompt: str | None = None, messages: list[dict] | None = None, temperature: float = 0.0, max_tokens: int = 256):
    if messages is None:
        messages = [{"role": "user", "content": prompt}]
    
    # Ollama expects images in a specific format
    processed_messages = []
    for msg in messages:
        content = msg["content"]
        
        # Handle multi-part content (text + images)
        if isinstance(content, list):
            # Extract text and images
            text_parts = [part["text"] for part in content if part.get("type") == "text"]
            image_parts = [part["image_url"]["url"] for part in content if part.get("type") == "image_url"]
            
            processed_msg = {
                "role": msg["role"],
                "content": " ".join(text_parts)
            }
            
            # Ollama uses "images" field for base64 data
            if image_parts:
                processed_msg["images"] = [
                    img.split(",")[1] if "base64," in img else img  # Extract base64 part
                    for img in image_parts
                ]
            
            processed_messages.append(processed_msg)
        else:
            # Simple text message
            processed_messages.append(msg)
    
    response = ollama_client.post(
        'http://localhost:11434/api/chat',
        json={
            "model": "llama3.2-vision:11b",
            "messages": processed_messages,
            "stream": False,
            "options": {"temperature": temperature}
        }
    )
    response.raise_for_status()
    return response.json()

def ollama_output_fn(response: dict) -> ModelResponse:
    content = response.get("message", {}).get("content", "")
    model = response.get("model", "custom")
    
    usage = Usage(
        prompt_tokens=response.get("prompt_eval_count", 0),
        completion_tokens=response.get("eval_count", 0),
        total_tokens=response.get("prompt_eval_count", 0) + response.get("eval_count", 0)
    )
    
    return ModelResponse.from_text(text=content.strip(), usage=usage, model=model)

In [4]:
import dspy
from package.base import DriverLM

# Setup
lm = DriverLM(
    request_fn=ollama_request_fn,  # Updated to handle images
    output_fn=ollama_output_fn,
    cache=True
)
lm.clear_cache()  # Clear old cache entries
dspy.configure(lm=lm)

# Use it
class SceneDescription(dspy.Signature):
    """Describe the contents of an image in detail."""
    image: dspy.Image = dspy.InputField(desc="Image to describe")
    scene_description: str = dspy.OutputField(desc="Detailed description")

describe = dspy.Predict(SceneDescription)
img = dspy.Image("./images/beach.jpg")
result = describe(image=img)
print(result.scene_description)

A serene beach scene with a palm tree on the left side, a single lounge chair with a beige umbrella to its right, and a small patch of green foliage behind it. The background features a vast expanse of white sand leading up to a tranquil turquoise ocean, set against a bright blue sky with a few wispy clouds.


In [5]:
img

Image(url=data:image/jpeg;base64,<IMAGE_BASE_64_ENCODED(49224)>)

In [6]:
# Read image file as bytes
with open("images/lake_mountain.jpg", "rb") as f:
    image_bytes = f.read()
# Create Image from bytes
img = dspy.Image(image_bytes)
result = describe(image=img)
print(result.scene_description)

The image depicts a serene and picturesque scene, with a majestic mountain looming in the background, its snow-capped peak glistening in the sunlight. The mountain's rugged terrain is accentuated by the surrounding landscape, which features lush greenery and a tranquil lake that reflects the mountain's majestic form. The sky above is a brilliant blue, with only a few wispy clouds scattered across it, adding to the sense of tranquility and peacefulness that pervades the scene. The overall atmosphere is one of natural beauty and wonder, inviting the viewer to step into the idyllic world captured in the image.


In [7]:
img

Image(url=data:image/jpeg;base64,<IMAGE_BASE_64_ENCODED(17836)>)

# Bedrock solution

In [8]:
import boto3
import base64
from typing import Any

def bedrock_request_fn(prompt: str | None = None, messages: list[dict] | None = None, temperature: float = 0.0, max_tokens: int = 2048):
    client = boto3.client('bedrock-runtime', region_name='us-east-1')
    
    if messages is None:
        messages = [{"role": "user", "content": prompt}]
    
    system_messages = []
    conversation_messages = []
    
    for msg in messages:
        if msg["role"] == "system":
            system_messages.append({"text": msg["content"]})
        else:
            content = msg["content"]
            
            # Handle multi-part content (text + images)
            if isinstance(content, list):
                bedrock_content = []
                
                for part in content:
                    if part.get("type") == "text":
                        bedrock_content.append({"text": part["text"]})
                    
                    elif part.get("type") == "image_url":
                        image_url = part["image_url"]["url"]
                        
                        if image_url.startswith("data:"):
                            # Parse: "data:image/jpeg;base64,..." -> format + bytes
                            header, data = image_url.split(",", 1)
                            format_type = header.split(";")[0].split("/")[1]  # "jpeg"
                            image_bytes = base64.b64decode(data)
                            
                            bedrock_content.append({
                                "image": {
                                    "format": format_type,
                                    "source": {"bytes": image_bytes}
                                }
                            })
                
                conversation_messages.append({
                    "role": msg["role"],
                    "content": bedrock_content
                })
            else:
                # Simple text message
                conversation_messages.append({
                    "role": msg["role"],
                    "content": [{"text": content}]
                })
    
    request_params = {
        "modelId": "us.amazon.nova-lite-v1:0",
        "messages": conversation_messages,
        "inferenceConfig": {
            "temperature": temperature,
            "maxTokens": max_tokens,
        }
    }
    
    if system_messages:
        request_params["system"] = system_messages
    
    response = client.converse(**request_params)
    return response

def bedrock_output_fn(response: dict) -> ModelResponse:
    content = response["output"]["message"]["content"][0]["text"]
    model = response.get("ResponseMetadata", {}).get("HTTPHeaders", {}).get("x-amzn-bedrock-model-id", "bedrock-model")
    
    usage_data = response.get("usage", {})
    usage = Usage(
        prompt_tokens=usage_data.get("inputTokens", 0),
        completion_tokens=usage_data.get("outputTokens", 0),
        total_tokens=usage_data.get("totalTokens", 0)
    )
    
    return ModelResponse.from_text(text=content, usage=usage, model=model)

In [9]:
# Create Bedrock LM
lm = DriverLM(
    request_fn=bedrock_request_fn,
    output_fn=bedrock_output_fn,
    cache=True
)
dspy.configure(lm=lm)

# Use same code as Ollama
describe = dspy.Predict(SceneDescription)
img = dspy.Image("./images/forest_creak.jpeg")
result = describe(image=img)
print(result.scene_description)

The image depicts a stunning waterfall cascading down a rocky cliff in a lush, green forest. The waterfall is divided into two main streams that flow over the rocks, creating a series of small waterfalls and pools of water. The surrounding area is densely covered with green foliage, and the rocks are partially covered in moss and greenery. The sunlight filters through the canopy, casting dappled light on the scene, enhancing the natural beauty of the environment.


In [10]:
img

Image(url=data:image/jpeg;base64,<IMAGE_BASE_64_ENCODED(812056)>)