In [48]:
from ollama import chat
from pydantic import BaseModel
import json
from typing import Literal, List, Optional

model = "llama3.2-vision"

In [9]:
import base64

def get_file_as_base64(file_path: str) -> str:
    with open(file_path, 'rb') as file:
        binary_data = file.read()
        base64_data = base64.b64encode(binary_data)
        return base64_data.decode('utf-8')

In [47]:
img_base64 = get_file_as_base64("../../assets/images/bird_in_flight.jpg")

In [18]:
# Sample call to ollama via chat to ask about image.

response = chat(
    model=model,
    messages=[
        {
            "role": "user", 
            "content": "Describe this image.",
            "images": [ img_base64 ]
        }
    ],
    options={'temperature': 0}
)

response = response.message
print(response.content)

This photograph captures a bird in mid-flight, its wings outstretched as it soars through the air.

The bird's plumage is predominantly dark gray, with subtle hints of lighter gray on its head and tail feathers. Its beak appears to be black or dark brown, adding a striking contrast to its overall appearance. The bird's posture conveys a sense of effortless flight, with its wings spread wide and its body angled slightly downward.

The background of the image is a soft, muted blue-gray hue, which provides a subtle yet effective backdrop for the bird's dynamic pose. Overall, this photograph presents a captivating snapshot of a bird in motion, showcasing its agility and beauty as it glides through the air.


In [42]:
# Structured output with image analysis
# See Example: https://ollama.com/blog/structured-outputs

class ImageObject(BaseModel):
  name: str
  confidence: float
  attributes: List[str] 

class ImageInfo(BaseModel):
  summary: str
  image_type: Literal['Photo', 'Painting', 'Digital', '3D_Render', 'Unknown']
  image_style: Literal['Realistic', 'Impressionist', 'Abstract', 'PixelArt', 'Other']
  objects: List[ImageObject]
  scene: str
  colors: List[str]
  time_of_day: Literal['Morning', 'Afternoon', 'Evening', 'Night']
  setting: Literal['Indoor', 'Outdoor', 'Unknown']
  text_content: Optional[str] = None

In [49]:
prompt = """Examine the image, give me a summary, and details
about each object in the image. Include time of day, key colors,
the setting, any text context, and what type of image it is
(photo, painting, digital, 3D render, ...) and what style it is in (realistic, impressionist, pixel art, ...).
"""

response = chat(
    model=model,
    format=ImageInfo.model_json_schema(),
    messages=[
        {
            "role": "user", 
            "content": prompt,
            "images": [img_base64]
        }
    ],
    options={'temperature': 0}
)

image_data = ImageInfo.model_validate_json(response.message.content)
print(image_data)

summary='A bird in flight against a gray sky.' image_type='Photo' image_style='Realistic' objects=[ImageObject(name='Bird', confidence=1.0, attributes=['Gray', 'Wings spread']), ImageObject(name='Sky', confidence=1.0, attributes=['Gray'])] scene='Outdoor' colors=['Gray'] time_of_day='Morning' setting='Outdoor' text_content=None
