In [2]:
import os
import sys
import time
import logging
import random
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from IPython.display import display, HTML
from typing import List, Dict, Tuple, Optional, Union, Any

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger('hdr_grouping_demo')

# Import our pipeline modules
from ingest import ImageIngestor
from embedding import SceneEmbedder
from clustering import SceneClusterer
from exposure_sorting import ExposureSorter
from validation import ClusterValidator
from output import ResultsExporter
from utils import estimate_exposure_value, normalize_image_size, enhance_local_contrast

# Set matplotlib parameters for better visualization
plt.rcParams['figure.figsize'] = (15, 10)
plt.rcParams['figure.dpi'] = 100

## Config

In [4]:
config = {
    'ingest': {
        'target_size': (512, 512),
        'max_workers': 16
    },
    'embedding': {
        'model_name': "facebook/dinov2-small",  # Can be changed to a lighter model if needed
        'use_gpu': True,
        'use_traditional_features': True,
        'dimensionality_reduction': "pca",
        'target_dims': 128,
        'cache_dir': "./embedding_cache",  # Cache embeddings for faster re-runs
        'max_workers': 2
    },
    'clustering': {
        'min_cluster_size': 2,
        'min_samples': 1,
        'cluster_selection_epsilon': 0.1,
        'metric': 'cosine',
        'cluster_selection_method': 'leaf'
    },
    'exposure_sorting': {
        'ssim_threshold': 0.7,
        'hash_threshold': 10,
        'min_ev_difference': 0.3
    },
    'validation': {
        'similarity_threshold': 0.7,
        'min_hdr_score': 0.4,
        'geometry_threshold': 0.6,
        'min_cluster_size': 2,
        'max_cluster_size': 20
    },
    'output': {
        'create_thumbnails': True,
        'thumbnail_size': (256, 256),
        'export_json': True,
        'copy_images': True
    }
}

# Path to the directory containing HDR images
input_dir = "./hdr_images"  # Change this to your input directory
output_dir = "./hdr_output"  # Change this to your output directory

# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

### Vizualisation helper functions

In [5]:
def display_image_pair(original, processed, titles=None):
    """Display an original image beside its processed version."""
    if titles is None:
        titles = ["Original Image", "Processed Image"]

    fig, axes = plt.subplots(1, 2, figsize=(15, 7))

    # Convert BGR to RGB for display
    if len(original.shape) == 3 and original.shape[2] == 3:
        original_rgb = original[..., ::-1]  # BGR to RGB
    else:
        original_rgb = original

    if len(processed.shape) == 3 and processed.shape[2] == 3:
        processed_rgb = processed[..., ::-1]  # BGR to RGB
    else:
        processed_rgb = processed

    # Display images
    axes[0].imshow(original_rgb, cmap='gray' if len(original.shape) == 2 else None)
    axes[0].set_title(titles[0])
    axes[0].axis('off')

    axes[1].imshow(processed_rgb, cmap='gray' if len(processed.shape) == 2 else None)
    axes[1].set_title(titles[1])
    axes[1].axis('off')

    plt.tight_layout()
    plt.show()

def display_cluster_samples(clusters, images_data, max_samples=5, max_clusters=5):
    """Display sample images from each cluster."""
    # Filter out noise cluster
    valid_clusters = {k: v for k, v in clusters.items() if k != -1}

    # Limit number of clusters to display
    cluster_ids = list(valid_clusters.keys())[:max_clusters]

    for cluster_id in cluster_ids:
        print(f"\nCluster {cluster_id} - {len(valid_clusters[cluster_id])} images:")

        # Get random samples from this cluster
        cluster_items = valid_clusters[cluster_id]
        sample_count = min(max_samples, len(cluster_items))
        samples = random.sample(cluster_items, sample_count)

        # Create a grid for displaying images
        fig, axes = plt.subplots(1, sample_count, figsize=(15, 5))
        if sample_count == 1:
            axes = [axes]  # Ensure axes is always a list

        for i, (img_id, confidence) in enumerate(samples):
            img_data = next((img for img in images_data if img['id'] == img_id), None)
            if img_data:
                # Convert BGR to RGB for display
                img_rgb = img_data['original_image'][..., ::-1]
                axes[i].imshow(img_rgb)
                axes[i].set_title(f"Confidence: {confidence:.2f}\nEV: {estimate_exposure_value(img_data['original_image']):.1f}")
                axes[i].axis('off')

        plt.tight_layout()
        plt.show()

def display_exposure_sequence(sequence, images_data):
    """Display a sorted exposure sequence for a cluster."""
    # Filter out duplicates and accidental shots
    valid_sequence = [info for info in sequence if not info.get('is_duplicate', False) and not info.get('is_accidental', False)]

    n_images = len(valid_sequence)
    if n_images == 0:
        print("No valid images in sequence")
        return

    fig, axes = plt.subplots(1, n_images, figsize=(15, 5))
    if n_images == 1:
        axes = [axes]  # Ensure axes is always a list

    for i, info in enumerate(valid_sequence):
        img_id = info['id']
        img_data = next((img for img in images_data if img['id'] == img_id), None)
        if img_data:
            # Convert BGR to RGB for display
            img_rgb = img_data['original_image'][..., ::-1]
            axes[i].imshow(img_rgb)
            axes[i].set_title(f"EV: {info['ev']:.1f}")
            axes[i].axis('off')

    plt.tight_layout()
    plt.show()

# Pipeline process

**Ingest data**

In [6]:
print(f"Ingesting images from: {input_dir}")
ingestor = ImageIngestor(**config['ingest'])

start_time = time.time()
images_data = ingestor.ingest_directory(input_dir)
ingest_time = time.time() - start_time

print(f"Ingested {len(images_data)} images in {ingest_time:.2f} seconds")

2025-04-10 11:49:55,421 - hdr_grouping.ingest - ERROR - Directory does not exist or is not a directory: ./hdr_images


Ingesting images from: ./hdr_images
Ingested 0 images in 0.00 seconds
