In [4]:
# Import necessary libraries
import cv2  # OpenCV library for image processing
import torch  # PyTorch library for machine learning and deep learning tasks
from PIL import Image  # Python Imaging Library (PIL) for handling and processing images
from transformers import BlipProcessor, BlipForConditionalGeneration  # Hugging Face Transformers for BLIP model
import matplotlib.pyplot as plt  # Matplotlib for image visualization
import os  # OS module to handle file paths and system operations

# Enable inline display of Matplotlib plots in Jupyter Notebook
%matplotlib inline

# Print confirmation message indicating that all libraries have been successfully imported
print("Libraries imported successfully.")


Libraries imported successfully.


In [3]:
# BLIP model initialization
try:
    # Load the processor and model for BLIP (Bootstrapped Language-Image Pretraining)
    # "Salesforce/blip-image-captioning-base" is a pre-trained model for image captioning
    processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
    model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

    # Determine the computation device: use GPU if available, otherwise fallback to CPU
    device = "cuda" if torch.cuda.is_available() else "cpu"

    # Move the model to the selected device (GPU or CPU)
    model.to(device)

except Exception as e:
    # Handle any errors that occur during model initialization
    print(f"An error occurred during model initialization: {str(e)}")
    raise  # Re-raise the exception to halt execution if initialization fails

else:
    # Confirm successful model loading and display the active device
    print(f"Model loaded successfully. Using device: {device}")


Model loaded successfully. Using device: cpu


In [None]:
def generate_caption(image_path):
    """
    Generate a caption for an image using the BLIP (Bootstrapped Language-Image Pretraining) model.
    
    Parameters:
        image_path (str): The file path of the input image.

    Returns:
        str: The generated caption for the image, or an error message if an issue occurs.
    """
    try:
        # Check if the image file exists at the given path
        if not os.path.exists(image_path):
            return "The image file does not exist."

        # Read the image from the file path using OpenCV
        img = cv2.imread(image_path)
        if img is None:
            return "The image cannot be read. It might be corrupted or not a valid image file."

        # Convert the image from BGR (OpenCV's default format) to RGB (used by PIL and models)
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # Convert the image (NumPy array) to a PIL Image for compatibility with the model processor
        pil_image = Image.fromarray(img_rgb)

        # Process the image and generate a caption using the BLIP model
        # The processor prepares the image as input for the model
        inputs = processor(pil_image, return_tensors="pt").to(device)
        
        # Generate a caption for the image with a specified maximum length
        output = model.generate(**inputs, max_length=50)
        
        # Decode the output tokens into a human-readable caption
        caption = processor.decode(output[0], skip_special_tokens=True)

        try:
            # Visualize the image with the generated caption
            plt.figure(figsize=(10, 8))  # Set the figure size
            plt.imshow(img_rgb)  # Display the RGB image
            plt.axis('off')  # Remove axis for better visualization
            plt.title(caption)  # Set the title to the generated caption
            plt.show()
        
        except Exception as e:
            # Handle any errors that occur during visualization
            print(f"Error occurred while visualizing the result: {str(e)}")
            return caption  # Return the caption even if visualization fails

    except Exception as e:
        # Handle any other errors that occur during the process
        return f"An error occurred while generating the caption: {str(e)}"

    # Return the generated caption as the final result
    return caption


In [None]:
# Test the generate_caption function with a sample image

# Path to the test image
image_path = "./images/image.jpg"

# Call the generate_caption function to generate a caption for the image
# The function handles image loading, processing, caption generation, and visualization
caption = generate_caption(image_path)

# Print the generated caption to the console
print(f"\nGenerated Caption: {caption}")



Generated Caption: An error occurred while generating the caption: name 'os' is not defined
