# InstructBLIP Image Captioning Notebook
This notebook loads the `Salesforce/instructblip-vicuna-7b` model to generate captions for uploaded images.

In [None]:
# 1. Install dependencies
!pip install -q transformers accelerate bitsandbytes
!pip install -q git+https://github.com/huggingface/transformers.git
!pip install -q git+https://github.com/huggingface/peft.git
!pip install -q git+https://github.com/huggingface/huggingface_hub.git

In [None]:
# 2. Import modules and upload images
import os
from PIL import Image
from datetime import datetime
import torch
import pandas as pd
from transformers import InstructBlipProcessor, InstructBlipForConditionalGeneration
from google.colab import files

# Upload image files
uploaded = files.upload()
image_paths = list(uploaded.keys())

In [None]:
# 3. Check device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

In [None]:
# 4. Load InstructBLIP model and processor
processor = InstructBlipProcessor.from_pretrained("Salesforce/instructblip-vicuna-7b")
model = InstructBlipForConditionalGeneration.from_pretrained(
    "Salesforce/instructblip-vicuna-7b",
    torch_dtype=torch.float16,
    device_map="auto"
)
model.eval()

In [None]:
# 5. Generate image descriptions
results = []

for image_path in image_paths:
    image_name = os.path.basename(image_path)
    try:
        image = Image.open(image_path).convert("RGB")
        image = image.resize((384, 384))
    except Exception as e:
        print(f"Failed to open image {image_name}: {e}")
        continue

    creation_datetime = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    prompt = "You are a helpful AI assistant. Describe this image in detail."

    inputs = processor(images=image, text=prompt, return_tensors="pt").to(model.device, torch.float16)

    with torch.no_grad():
        output = model.generate(**inputs, max_new_tokens=100)
        description = processor.batch_decode(output, skip_special_tokens=True)[0]

    results.append({
        "Filename": image_name,
        "Capture Time": creation_datetime,
        "Description": description
    })

# Display as DataFrame
df = pd.DataFrame(results)
df

In [None]:
# 6. Save to CSV and download
df.to_csv("instructblip_results.csv", index=False)
files.download("instructblip_results.csv")