In [None]:
#the code lasts hours but working

import os
import glob
import base64
import json
import csv
import requests

IMAGE_DIR   = "thumbnails_travel"   
OUTPUT_CSV  = "thumbnail_descriptions_llava.csv"
OLLAMA_URL  = "http://localhost:11434/api/generate"
MODEL_NAME  = "llava:7b"               
PROMPT      = "Describe this YouTube thumbnail in one detailed sentence, focusing on what is visually shown and the style."


def encode_image_to_base64(path: str) -> str:
    with open(path, "rb") as f:
        return base64.b64encode(f.read()).decode("utf-8")


def describe_with_llava(image_path: str) -> str:
    img_b64 = encode_image_to_base64(image_path)

    payload = {
        "model": MODEL_NAME,
        "prompt": PROMPT,
        "stream": False,
        "images": [img_b64],
    }

    resp = requests.post(OLLAMA_URL, json=payload)
    resp.raise_for_status()

    data = resp.json()
    caption = data.get("response", "").strip()
    print(caption)
    return caption


def main():
    exts = ("*.jpg", "*.jpeg", "*.png", "*.webp")
    img_paths = []
    for ext in exts:
        img_paths.extend(glob.glob(os.path.join(IMAGE_DIR, ext)))

    if not img_paths:
        print(f"[WARN] No images found in {IMAGE_DIR}")
        return

    print(f"[INFO] Found {len(img_paths)} thumbnails.")

    rows = []
    for i, path in enumerate(sorted(img_paths), start=1):
        print(f"[{i}/{len(img_paths)}] Captioning {path} ...")
        try:
            caption = describe_with_llava(path)
        except Exception as e:
            print(f"   [ERROR] {path}: {e}")
            caption = ""

        rows.append({
            "filename": os.path.basename(path),
            "path": path,
            "description": caption,
        })

    with open(OUTPUT_CSV, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=["filename", "path", "description"])
        writer.writeheader()
        writer.writerows(rows)

    print(f"[DONE] Saved descriptions to {OUTPUT_CSV}")


if __name__ == "__main__":
    main()


[INFO] Found 18444 thumbnails.
[1/18444] Captioning thumbnails_travel/--27C2-_56U.jpg ...
The image features a woman holding a cardboard box while standing in front of a house with a sign that reads "apartment hunting alone" in bold text. The thumbnail has an informative design style, including bullet points highlighting different aspects such as "18", "DIY", "moving diaries", and "2 moving diaries". There's also a playful tone to the image with the inclusion of cartoon elements like pizza boxes and a broken heart. The overall aesthetic is vibrant and engaging, designed to catch viewers' attention with its mix of text and visual elements.
[2/18444] Captioning thumbnails_travel/--9KQM9LsU8.jpg ...
The image is a composite of two separate photographs with text overlaying both. On the left side, there's a photo of a woman eating food from a bowl while making a humorous facial expression; she is seated at a table and appears to be in a restaurant or similar setting. On the right, there's a

KeyboardInterrupt: 