In [2]:
%pip install --upgrade openai --quiet

Note: you may need to restart the kernel to use updated packages.


In [24]:
from dotenv import load_dotenv
import os
load_dotenv() 

True

In [25]:
from openai import OpenAI

MODEL="gpt-4o"
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

In [26]:
from IPython.display import Image, display
import base64

def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")

In [27]:
def generate_caption(image_path, summary_length="small"):
    base64_image = encode_image(image_path)
    response = client.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": "You are a helpful assistant that responds in Markdown. Help me with my image caption generation!"},
            {"role": "user", "content": [
                {"type": "text", "text": f"Generate a {summary_length} caption for the image"},
                {"type": "image_url", "image_url": {
                    "url": f"data:image/png;base64,{base64_image}"}
                }
            ]}
        ],
        temperature=0.0,
    )
    return response.choices[0].message.content

In [21]:
import os
import json

# Define the directory where the images are stored
image_dir = './images/'

with open('data.json', 'r') as f:
    data = json.load(f)

for image_id, image_info in data['images'].items():
    image_path = os.path.join(image_dir, f"{image_id}.png")
    
    image_caption_small = generate_caption(image_path, "small")
    image_info['gpt_small'] = image_caption_small
    
    image_caption_short = generate_caption(image_path, "short")
    image_info['gpt_short'] = image_caption_short
    
    image_caption_kw = generate_caption(image_path, "comma-seperated keywords")
    image_info['gpt_keywords'] = image_caption_kw
    
print(data)

{'info': {'description': 'This is a NCERT Dataset containing the images from the book with their captions', 'version': '1.0', 'year': 2024, 'date_created': '2024-05-15'}, 'images': {'001': {'ncert': 'Bacteria', 'gpt_small': 'Microscopic view of chromosomes stained in pink against a dark background.', 'gpt_short': 'Microscopic view of bacteria stained in pink.', 'gpt_keywords': 'bacteria, microscopic, pink, black background, microorganisms, spiral shape, microbiology, pathogens, magnified, scientific image'}, '002': {'ncert': 'Bacteria', 'gpt_small': 'Microscopic view of rod-shaped bacteria.', 'gpt_short': 'Microscopic view of rod-shaped bacteria.', 'gpt_keywords': 'bacteria, microscopic, blue background, rod-shaped, microbiology, cells, magnified, microorganisms, science, laboratory'}, '003': {'ncert': 'Chlamydomonas Algae', 'gpt_small': 'Diagram of a plant cell showing various organelles.', 'gpt_short': 'Diagram of a plant cell showing its internal structures.', 'gpt_keywords': 'cell,

In [22]:
with open('data.json', 'w') as f:
    json.dump(data, f, indent=4)