In [None]:
!pip install google-generativeai

In [None]:
from dotenv import load_dotenv
import os, time
import google.generativeai as genai
from PIL import Image
import pandas as pd

# Load environment variables
load_dotenv()

# Configure the Gemini API key
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))

# Create the model
generation_config = {
  "temperature": 0,
  "top_p": 0.95,
  "top_k": 5,
  "response_mime_type": "text/plain",
}

# Load the Gemini models : gemini-2.0-flash, gemini-1.5-flash, gemini-2.0-flash-lite
models = ['gemini-2.0-flash', 'gemini-1.5-flash', 'gemini-2.0-flash-lite']

for model in models:
    model = genai.GenerativeModel(model, generation_config=generation_config,)

    def get_gemini_response(image, prompt):
        """Generate caption using Gemini Pro Vision."""
        response = model.generate_content([image, prompt])
        return response.text

    def encode_image(image_path):
        """Read and encode the image as bytes."""
        with open(image_path, "rb") as image_file:
            bytes_data = image_file.read()

        return {
            "mime_type": "image/jpeg",  # Update MIME type if needed
            "data": bytes_data
        }

    def process_images(folder_path):
        """Process images in the folder and generate captions."""
        file_names = []
        captions = []

        # Input prompt for the model
        input_prompt = """
        Act as an expert in Image Captioning. Your task is to generate high quality caption in Bengali Language.

        1. Generate accurate and precise caption to describe the image properly
        2. Generate caption in one sentence within 10 words
        """

        # Process all images in the folder
        for image_name in os.listdir(folder_path):
            image_path = os.path.join(folder_path, image_name)
            
            if not os.path.isfile(image_path) or not image_name.lower().endswith(("jpg", "jpeg", "png")):
                continue  # Skip non-image files
            
            try:
                print(f"Processing: {image_name}")
                image_data = encode_image(image_path)
                caption = get_gemini_response(image_data, input_prompt)
                # Wait for 90 seconds before processing the next image
                time.sleep(6)
                file_names.append(image_name)
                captions.append(caption)

            except Exception as e:
                print(f"Error processing {image_name}: {e}")
                captions.append(f"{image_name}: Error generating caption")

        # Save captions to a file
        df = pd.DataFrame(list(zip(file_names, captions)),
                columns =['FileName', 'Caption'])

        df.to_csv(f"{model}-caption.csv", index=False)

    # Example usage
    folder_path = os.path.join("..\image_dataset", "images")

    process_images(folder_path)