In [2]:
import dspy

In [3]:

# Clear DSPy's global disk cache
if hasattr(dspy, 'cache') and hasattr(dspy.cache, 'disk_cache'):
    dspy.cache.disk_cache.clear()
    print("clear")

clear


In [4]:
from package.base import DriverLM, ModelResponse, Usage
import httpx

ollama_client = httpx.Client(timeout=600.0)
def ollama_request_fn(prompt: str | None = None, messages: list[dict] | None = None, temperature: float = 0.0, max_tokens: int = 256):
    if messages is None:
        messages = [{"role": "user", "content": prompt}]
    
    # Ollama expects images in a specific format
    processed_messages = []
    for msg in messages:
        content = msg["content"]
        
        # Handle multi-part content (text + images)
        if isinstance(content, list):
            # Extract text and images
            text_parts = [part["text"] for part in content if part.get("type") == "text"]
            image_parts = [part["image_url"]["url"] for part in content if part.get("type") == "image_url"]
            
            processed_msg = {
                "role": msg["role"],
                "content": " ".join(text_parts)
            }
            
            # Ollama uses "images" field for base64 data
            if image_parts:
                processed_msg["images"] = [
                    img.split(",")[1] if "base64," in img else img  # Extract base64 part
                    for img in image_parts
                ]
            
            processed_messages.append(processed_msg)
        else:
            # Simple text message
            processed_messages.append(msg)
    
    response = ollama_client.post(
        'http://localhost:11434/api/chat',
        json={
            "model": "llama3.2-vision:11b",
            "messages": processed_messages,
            "stream": False,
            "options": {"temperature": temperature}
        }
    )
    response.raise_for_status()
    return response.json()


In [5]:
def ollama_output_fn(response: dict) -> ModelResponse:
    content = response.get("message", {}).get("content", "")
    model = response.get("model", "custom")
    
    usage = Usage(
        prompt_tokens=response.get("prompt_eval_count", 0),
        completion_tokens=response.get("eval_count", 0),
        total_tokens=response.get("prompt_eval_count", 0) + response.get("eval_count", 0)
    )
    
    return ModelResponse.from_text(text=content.strip(), usage=usage, model=model)


# native_lm = DriverLM(
#     request_fn=ollama_request_fn,
#     output_fn=ollama_output_fn,
#     cache=True
# )


In [None]:
import dspy
from package.base import DriverLM

# Setup
lm = DriverLM(
    request_fn=ollama_request_fn,  # Updated to handle images
    output_fn=ollama_output_fn,
    cache=True
)
lm.clear_cache()  # Clear old cache entries
dspy.configure(lm=lm)

# Use it
class SceneDescription(dspy.Signature):
    """Describe the contents of an image in detail."""
    image: dspy.Image = dspy.InputField(desc="Image to describe")
    scene_description: str = dspy.OutputField(desc="Detailed description")

describe = dspy.Predict(SceneDescription)
img = dspy.Image("./images/beach.jpg")
result = describe(image=img)
print(result.scene_description)

The image depicts a serene beach scene, featuring a palm tree leaning slightly to the left, its trunk visible from the base to the top, where it splits into multiple fronds. A single sun lounger is positioned directly in front of the tree, with a white umbrella providing shade above. The background is a clear blue sky with a few clouds, and the overall atmosphere is one of tranquility and relaxation.


In [None]:
# Read image file as bytes
with open("images/lake_mountain.jpg", "rb") as f:
    image_bytes = f.read()
# Create Image from bytes
img = dspy.Image(image_bytes)
result = describe(image=img)
print(result.scene_description)

The image depicts a serene lake scene with a majestic mountain in the background. The lake's surface is calm and peaceful, reflecting the vibrant colors of the surrounding environment. A few trees with red leaves are visible on the right side of the image, adding a pop of color to the otherwise monochromatic scene. In the distance, a mountain rises, its snow-capped peak glistening in the light. The sky above is a brilliant blue, with a few wispy clouds scattered across it. The overall atmosphere of the image is one of tranquility and natural beauty, inviting the viewer to step into the peaceful world it presents.
