In [4]:
from transformers import BlipProcessor, BlipForQuestionAnswering

# Initialize model
processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base")

  from .autonotebook import tqdm as notebook_tqdm
Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


In [5]:
# Image path
from pathlib import Path

base_path = Path.cwd().parent.parent.parent
image_path = base_path / "resource" / "images"


print("Base Path:", base_path)
print("Database Path:", image_path)

# We will use img_17.jpg as our example image
image_17_path = image_path / "img_17.jpg"
print("Image Path:", image_17_path)

Base Path: /home/afiq/fyp/fafa-repo/backend/app
Database Path: /home/afiq/fyp/fafa-repo/backend/app/resource/images
Image Path: /home/afiq/fyp/fafa-repo/backend/app/resource/images/img_17.jpg


In [13]:
import os
from urllib.parse import urlparse
from langchain.tools import tool
import requests
import torch
from PIL import Image

@tool("image_qna_tool", description="Use this tool when you want to answer questions that needs visual information from an image. The image should be a URL or a local file path.")
def image_qna_tool(img_url: str, question: str) -> str:
    """
    Use this tool when you want to answer questions about an image. The image should be a URL or a local file path.
    
    Args:
        img_url: The URL/URI or path of the image
        question: The question to answer about the image
        
    Returns:
        The answer to the question.
    """
    image = load_image(img_url)

    inputs = processor(image, question, return_tensors="pt")

    with torch.no_grad():
        output_ids = model.generate(**inputs)

    answer = processor.decode(output_ids[0], skip_special_tokens=True)
    return answer


# Helper
def load_image(img_url: str) -> Image.Image:
    """
    Load an image from:
    - http/https URL
    - local file path
    - file:// URI
    """
    parsed = urlparse(img_url)

    # Remote URL
    if parsed.scheme in ("http", "https"):
        response = requests.get(img_url, stream=True)
        response.raise_for_status()
        return Image.open(response.raw).convert("RGB")

    # file:// URI
    if parsed.scheme == "file":
        path = Path(parsed.path)
        if not path.exists():
            raise FileNotFoundError(f"File not found: {path}")
        return Image.open(path).convert("RGB")
    
    # Local path (no scheme)
    if os.path.exists(img_url):
        return Image.open(img_url).convert("RGB")

    raise ValueError(f"Unsupported image URL or path: {img_url}")

