In [1]:
!pip install transformers torchvision tqdm




In [5]:
import os
import json
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration

# Paths to the datasets
dataset_1_dir = "/kaggle/input/landscape-recognition-image-dataset-12k-images/Landscape Classification/Landscape Classification/Training Data"
dataset_2_dir = "/kaggle/input/landscape-pictures/"
categories = ["Coast", "Desert", "Forest", "Glacier", "Mountain"]  # For Dataset 1
output_file = "/kaggle/working/combined_landscape_captions.json"

# Initialize BLIP model and processor
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").cuda()

# Function to generate caption
def generate_caption(image_path):
    try:
        image = Image.open(image_path).convert("RGB")
        inputs = processor(image, return_tensors="pt").to("cuda")
        caption = model.generate(**inputs)
        return processor.decode(caption[0], skip_special_tokens=True)
    except Exception as e:
        print(f"Error processing {image_path}: {e}")
        return None

# Process Dataset 1 by Categories
def process_dataset_1(image_caption_pairs):
    for category in categories:
        category_dir = os.path.join(dataset_1_dir, category)
        if os.path.exists(category_dir):
            print(f"Processing category: {category}")
            for root, _, files in os.walk(category_dir):
                for file in files:
                    if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                        image_path = os.path.join(root, file)
                        caption = generate_caption(image_path)
                        if caption:
                            image_caption_pairs[image_path] = caption
        else:
            print(f"Category directory {category_dir} not found.")

# Process Dataset 2
def process_dataset_2(image_caption_pairs):
    print("Processing Dataset 2...")
    for root, _, files in os.walk(dataset_2_dir):
        for file in files:
            if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                image_path = os.path.join(root, file)
                caption = generate_caption(image_path)
                if caption:
                    image_caption_pairs[image_path] = caption

# Combine both datasets
def combine_datasets():
    image_caption_pairs = {}
    
    # Process Dataset 1
    process_dataset_1(image_caption_pairs)
    
    # Process Dataset 2
    process_dataset_2(image_caption_pairs)
    
    # Save combined dataset to JSON
    with open(output_file, "w") as f:
        json.dump(image_caption_pairs, f, indent=4)
    print(f"Combined captions saved to {output_file}")

# Run the process
combine_datasets()


Processing category: Coast
Processing category: Desert
Processing category: Forest
Processing category: Glacier
Processing category: Mountain
Processing Dataset 2...
Combined captions saved to /kaggle/working/combined_landscape_captions.json
