In [None]:
pip install transformers Pillow torch torchvision torchaudio gradio langchain-experimental psutil

# Import statements

In [36]:
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image
import requests
import gradio as gd
from langchain_community.llms import Ollama
import psutil
from langchain_core.prompts import PromptTemplate

# Define processor and model from hugging face

In [None]:
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

# Connect to locally running LLM by using Ollama

In [38]:
# This assumes that ollama is running on localhost at port 11434 and has "mistral:instruct" downloaded
llm = Ollama(model="mistral:instruct", num_thread = (psutil.cpu_count() - 2), keep_alive = -1, num_ctx=4098)
# llm.invoke("Hi") // test

# Generate image description from image

In [39]:
def get_image_description(image):
    inputs = processor(image, return_tensors="pt")
    outputs = model.generate(**inputs)
    return processor.decode(outputs[0],skip_special_tokens=True)

# Create chain for generating caption from image description

In [40]:
captionGenerationPromptStr = """
You are an expert at writing catchy captions for social media posts. Suggest a catchy caption for my social media post about {topic}
"""
captionGenerationPrompt = PromptTemplate(
    template=captionGenerationPromptStr,
    input_variables=["topic"]
)

captionChain = captionGenerationPrompt | llm

# Driver method

In [41]:
def generate_caption(image):
    isUrl = False
    if image == None:
        return "You have to specify either images or image url."
    if isinstance(image, str):
        isUrl = True
        image = Image.open(requests.get(image, stream=True).raw)

    description = get_image_description(image)

    caption = captionChain.invoke({"topic" : description})

    if isUrl:
        return caption, image
    return caption

# Create UI using gradio

In [42]:
project_description = "Transform you photos into shareable moments with AI-generated, attention-grabbing captions"
image_upload = gd.Interface(
  fn=generate_caption,
  inputs=gd.Image(type="pil"),
  outputs=["text"],
  title="CaptionCraft",
  description=project_description
)

image_url = gd.Interface(
  fn=generate_caption,
  inputs=["text"],
  outputs=["text","image"],
  title="CaptionCraft",
  description=project_description
)

gd.TabbedInterface(
    [image_upload, image_url], ["Upload an image.", "Provide image url."]
).launch()

Running on local URL:  http://127.0.0.1:7872

To create a public link, set `share=True` in `launch()`.


