In [9]:
import os
import time
import logging
import random
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger('hdr_grouping_demo')

# Import our pipeline modules
from ingest import ImageIngestor
from embedding import SceneEmbedder
from clustering import SceneClusterer
from exposure_sorting import ExposureSorter
from validation import ClusterValidator
from output import ResultsExporter
from utils import estimate_exposure_value

# Set matplotlib parameters for better visualization
plt.rcParams['figure.figsize'] = (15, 10)
plt.rcParams['figure.dpi'] = 100

## Config

# HDR Image Grouping Pipeline Configuration Parameters

## Ingest Module Parameters

| Parameter | Description |
|-----------|-------------|
| `target_size` | Determines the dimensions (width, height) to which all input images are resized during preprocessing. A value of `(512, 512)` provides a good balance between preserving important details and computational efficiency. Larger sizes preserve more details but require more memory and processing time. |
| `max_workers` | Controls the number of parallel processes used during image ingestion. Setting to `16` allows processing multiple images simultaneously, significantly speeding up the ingestion phase for large collections. Should be adjusted based on your CPU's capabilities. |

## Embedding Module Parameters

| Parameter | Description |
|-----------|-------------|
| `model_name` | Specifies the deep learning model used to generate image embeddings. `facebook/dinov2-small` is a vision transformer model that produces high-quality representations that are robust to exposure variations. Can be changed to a lighter model if processing speed is a concern. |
| `use_gpu` | When `True`, the embedding generation will utilize GPU acceleration if available, which can be significantly faster, especially for deep learning models. |
| `use_traditional_features` | When `True`, traditional computer vision features (SIFT/ORB) are used alongside deep learning features, providing complementary information that can improve robustness, especially for extreme exposures. |
| `dimensionality_reduction` | Method used to reduce the dimensionality of embeddings. `pca` applies Principal Component Analysis, which preserves major variations while reducing computation for subsequent steps. Other options include `umap` (for better cluster separation) or `None` (no reduction). |
| `target_dims` | Target dimensionality for the reduced embeddings. `128` dimensions typically preserve enough information while making clustering more efficient. |
| `cache_dir` | Directory where computed embeddings are cached. Using `./embedding_cache` allows faster re-runs as previously processed images won't need to be re-embedded. |
| `max_workers` | Number of parallel workers for embedding generation. Set lower than ingest workers (`2`) to avoid GPU memory issues when using deep learning models. |

## Clustering Module Parameters

| Parameter | Description |
|-----------|-------------|
| `min_cluster_size` | Minimum number of images required to form a cluster. Setting to `1` allows single-image "clusters" which may be useful for unique shots, though usually 2+ is recommended for HDR grouping. |
| `min_samples` | Determines how conservative the clustering algorithm is. With `1`, a point can be a core point with just one neighbor, making clustering less restrictive. Higher values create more robust but fewer clusters. |
| `cluster_selection_epsilon` | Distance threshold for expanding clusters. `0.1` allows moderately similar points to join existing clusters. Higher values create larger, potentially less coherent clusters. |
| `metric` | Distance measure between embeddings. `cosine` is typically best for high-dimensional embeddings as it focuses on the direction rather than magnitude of feature vectors. |
| `cluster_selection_method` | Algorithm for extracting flat clusters from the hierarchical structure. `leaf` extracts clusters at leaf nodes, which tends to produce smaller, more numerous clusters compared to the alternative `eom` (Excess of Mass). |

## Exposure Sorting Parameters

| Parameter | Description |
|-----------|-------------|
| `ssim_threshold` | Structural Similarity Index threshold for identifying duplicate images. Images with similarity above `0.7` might be considered duplicates. Higher values require greater similarity to flag duplicates. |
| `hash_threshold` | Perceptual hash difference threshold for duplicate detection. Images with hash differences below `10` might be duplicates. Lower values are more stringent, requiring greater visual similarity. |
| `min_ev_difference` | Minimum exposure value (EV) difference required between images to consider them distinct exposure levels. `0.3` EV represents a noticeable but not dramatic exposure change. Lower values create more granular exposure sequences. |

## Validation Parameters

| Parameter | Description |
|-----------|-------------|
| `similarity_threshold` | Minimum average feature similarity required within a cluster for it to be considered valid. `0.7` ensures that images in a cluster share significant visual content. |
| `min_hdr_score` | Minimum HDR quality score required for a valid exposure sequence. `0.4` ensures reasonable exposure diversity for HDR purposes. Higher values require more ideal HDR brackets. |
| `geometry_threshold` | Minimum geometric consistency score based on homography matching. `0.6` ensures that images in a cluster share the same physical scene structure despite exposure differences. |
| `min_cluster_size` | Minimum number of images required for a cluster to be considered valid. `1` allows single-image clusters to be valid, though typically 2+ is better for HDR applications. |
| `max_cluster_size` | Maximum images allowed in a single cluster before considering splitting it. `20` prevents overly large clusters that might combine multiple scenes. |

## Output Parameters

| Parameter | Description |
|-----------|-------------|
| `create_thumbnails` | When `True`, generates thumbnail previews of all processed images for easier visual inspection. |
| `thumbnail_size` | Dimensions (width, height) for generated thumbnails. `(256, 256)` provides a good balance between visibility and file size. |
| `export_json` | When `True`, exports detailed metadata about clusters and images in JSON format for potential downstream processing. |
| `copy_images` | When `True`, copies the original images to the output directory structure. If `False`, only metadata is exported, which can save disk space. |

In [None]:
config = {
    'ingest': {
        'target_size': (512, 512),
        'max_workers': 16
    },
    'embedding': {
        'model_name': "facebook/dinov2-small",  # Can be changed to a lighter model if needed
        'use_gpu': True,
        'use_traditional_features': True,
        'dimensionality_reduction': "pca",
        'target_dims': 128,
        'cache_dir': "./embedding_cache",  # Cache embeddings for faster re-runs
        'max_workers': 2
    },
    'clustering': {
        'min_cluster_size': 1,
        'min_samples': 1,
        'cluster_selection_epsilon': 0.1,
        'metric': 'cosine',
        'cluster_selection_method': 'leaf'
    },
    'exposure_sorting': {
        'ssim_threshold': 0.7,
        'hash_threshold': 10,
        'min_ev_difference': 0.3
    },
    'validation': {
        'similarity_threshold': 0.7,
        'min_hdr_score': 0.4,
        'geometry_threshold': 0.6,
        'min_cluster_size': 1,
        'max_cluster_size': 20
    },
    'output': {
        'create_thumbnails': True,
        'thumbnail_size': (256, 256),
        'export_json': True,
        'copy_images': True
    }
}

# Path to the directory containing HDR images
input_dir = "images/shoot_3"  # Change this to your input directory
output_dir = "output/shoot_3"  # Change this to your output directory

# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

### Vizualisation helper functions

In [None]:
def display_image_pair(original, processed, titles=None):
    """Display an original image beside its processed version."""
    if titles is None:
        titles = ["Original Image", "Processed Image"]

    fig, axes = plt.subplots(1, 2, figsize=(15, 7))

    # Convert BGR to RGB for display
    if len(original.shape) == 3 and original.shape[2] == 3:
        original_rgb = original[..., ::-1]  # BGR to RGB
    else:
        original_rgb = original

    if len(processed.shape) == 3 and processed.shape[2] == 3:
        processed_rgb = processed[..., ::-1]  # BGR to RGB
    else:
        processed_rgb = processed

    # Display images
    axes[0].imshow(original_rgb, cmap='gray' if len(original.shape) == 2 else None)
    axes[0].set_title(titles[0])
    axes[0].axis('off')

    axes[1].imshow(processed_rgb, cmap='gray' if len(processed.shape) == 2 else None)
    axes[1].set_title(titles[1])
    axes[1].axis('off')

    plt.tight_layout()
    plt.show()

def display_cluster_samples(clusters, images_data, max_samples=5, max_clusters=5):
    """Display sample images from each cluster."""
    # Filter out noise cluster
    valid_clusters = {k: v for k, v in clusters.items() if k != -1}

    # Limit number of clusters to display
    cluster_ids = list(valid_clusters.keys())[:max_clusters]

    for cluster_id in cluster_ids:
        print(f"\nCluster {cluster_id} - {len(valid_clusters[cluster_id])} images:")

        # Get random samples from this cluster
        cluster_items = valid_clusters[cluster_id]
        sample_count = min(max_samples, len(cluster_items))
        samples = random.sample(cluster_items, sample_count)

        # Create a grid for displaying images
        fig, axes = plt.subplots(1, sample_count, figsize=(15, 5))
        if sample_count == 1:
            axes = [axes]  # Ensure axes is always a list

        for i, (img_id, confidence) in enumerate(samples):
            img_data = next((img for img in images_data if img['id'] == img_id), None)
            if img_data:
                # Convert BGR to RGB for display
                img_rgb = img_data['original_image'][..., ::-1]
                axes[i].imshow(img_rgb)
                axes[i].set_title(f"Confidence: {confidence:.2f}\nEV: {estimate_exposure_value(img_data['original_image']):.1f}")
                axes[i].axis('off')

        plt.tight_layout()
        plt.show()

def display_exposure_sequence(sequence, images_data):
    """Display a sorted exposure sequence for a cluster."""
    # Filter out duplicates and accidental shots
    valid_sequence = [info for info in sequence if not info.get('is_duplicate', False) and not info.get('is_accidental', False)]

    n_images = len(valid_sequence)
    if n_images == 0:
        print("No valid images in sequence")
        return

    fig, axes = plt.subplots(1, n_images, figsize=(15, 5))
    if n_images == 1:
        axes = [axes]  # Ensure axes is always a list

    for i, info in enumerate(valid_sequence):
        img_id = info['id']
        img_data = next((img for img in images_data if img['id'] == img_id), None)
        if img_data:
            # Convert BGR to RGB for display
            img_rgb = img_data['original_image'][..., ::-1]
            axes[i].imshow(img_rgb)
            axes[i].set_title(f"EV: {info['ev']:.1f}")
            axes[i].axis('off')

    plt.tight_layout()
    plt.show()

# Pipeline process

**Ingest data**

In [None]:
print(f"Ingesting images from: {input_dir}")
ingestor = ImageIngestor(**config['ingest'])

start_time = time.time()
images_data = ingestor.ingest_directory(input_dir)
ingest_time = time.time() - start_time

print(f"Ingested {len(images_data)} images in {ingest_time:.2f} seconds")

**preview image transforms**

In [None]:
if images_data:
    # Select a few random images to display transformations
    sample_images = random.sample(images_data, min(3, len(images_data)))

    for img_data in sample_images:
        print(f"\nImage: {img_data['metadata']['filename']}")
        print(f"Original size: {img_data['metadata']['original_width']}x{img_data['metadata']['original_height']}")
        print(f"Estimated exposure value: {estimate_exposure_value(img_data['original_image']):.2f}")

        # Display original vs. normalized
        display_image_pair(img_data['original_image'], img_data['normalized_image'],
                       ["Original Image", "Normalized (Resized) Image"])

        # Display grayscale vs. enhanced contrast
        display_image_pair(img_data['grayscale_image'], img_data['enhanced_image'],
                       ["Grayscale Image", "Enhanced Contrast (CLAHE)"])

### Create image embeddings

In [None]:
# Create cache directory if it doesn't exist
os.makedirs(config['embedding']['cache_dir'], exist_ok=True)

embedder = SceneEmbedder(**config['embedding'])

start_time = time.time()
embeddings = embedder.create_embeddings(images_data)
embedding_time = time.time() - start_time

print(f"Created {len(embeddings)} embeddings in {embedding_time:.2f} seconds")
if len(embeddings) > 0:
    sample_id = list(embeddings.keys())[0]
    print(f"Sample embedding shape: {embeddings[sample_id].shape}")

In [None]:
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA

if len(embeddings) > 2:  # Need at least 3 points for meaningful visualization
    # Create embedding matrix
    embedding_matrix = np.vstack(list(embeddings.values()))
    image_ids = list(embeddings.keys())

    # Use PCA for quick visualization
    pca = PCA(n_components=2)
    pca_result = pca.fit_transform(embedding_matrix)

    # Plot PCA
    plt.figure(figsize=(10, 8))
    plt.scatter(pca_result[:, 0], pca_result[:, 1], alpha=0.7)
    plt.title(f"PCA Visualization of {len(embeddings)} Image Embeddings")
    plt.xlabel(f"PCA Component 1 ({pca.explained_variance_ratio_[0]:.2%} variance)")
    plt.ylabel(f"PCA Component 2 ({pca.explained_variance_ratio_[1]:.2%} variance)")
    plt.grid(alpha=0.3)
    plt.show()

    # If we have enough samples, also try t-SNE (more computationally intensive)
    if len(embeddings) > 5 and len(embeddings) < 1000:  # t-SNE works best with moderate dataset sizes
        tsne = TSNE(n_components=2, perplexity=min(30, len(embeddings)-1), max_iter=1000, random_state=42)
        tsne_result = tsne.fit_transform(embedding_matrix)

        # Plot t-SNE
        plt.figure(figsize=(10, 8))
        plt.scatter(tsne_result[:, 0], tsne_result[:, 1], alpha=0.7)
        plt.title(f"t-SNE Visualization of {len(embeddings)} Image Embeddings")
        plt.xlabel("t-SNE Component 1")
        plt.ylabel("t-SNE Component 2")
        plt.grid(alpha=0.3)
        plt.show()

**Cluster images**

In [None]:
clusterer = SceneClusterer(**config['clustering'])

start_time = time.time()
clusters = clusterer.cluster_embeddings(embeddings, images_data)
clustering_time = time.time() - start_time

# Count valid clusters (excluding noise)
valid_cluster_count = len([k for k in clusters.keys() if k != -1])
noise_count = len(clusters.get(-1, []))

print(f"Found {valid_cluster_count} clusters in {clustering_time:.2f} seconds")
print(f"Noise points (unclustered images): {noise_count}")

# Print cluster sizes
print("\nCluster sizes:")
for cluster_id, items in sorted(clusters.items()):
    if cluster_id == -1:
        print(f"  Noise cluster: {len(items)} images")
    else:
        print(f"  Cluster {cluster_id}: {len(items)} images")

In [None]:
if len(embeddings) > 2 and len(clusters) > 1:  # Need at least 3 points and 2 clusters for meaningful visualization
    # Create mapping from image ID to cluster ID
    id_to_cluster = {}
    for cluster_id, items in clusters.items():
        for img_id, _ in items:
            id_to_cluster[img_id] = cluster_id

    # Create embedding matrix and cluster labels
    embedding_matrix = []
    cluster_labels = []

    for img_id, embedding in embeddings.items():
        embedding_matrix.append(embedding)
        cluster_labels.append(id_to_cluster.get(img_id, -1))  # Default to noise cluster

    embedding_matrix = np.vstack(embedding_matrix)
    cluster_labels = np.array(cluster_labels)

    # Use PCA for visualization
    pca = PCA(n_components=2)
    pca_result = pca.fit_transform(embedding_matrix)

    # Plot PCA with cluster colors
    plt.figure(figsize=(12, 10))

    # Get unique cluster IDs for coloring
    unique_clusters = np.unique(cluster_labels)

    # Create a colormap
    import matplotlib.cm as cm
    colors = cm.rainbow(np.linspace(0, 1, len(unique_clusters)))

    # Plot each cluster with a different color
    for i, cluster_id in enumerate(unique_clusters):
        mask = cluster_labels == cluster_id
        if cluster_id == -1:
            # Plot noise points as black X markers
            plt.scatter(pca_result[mask, 0], pca_result[mask, 1], c='black', marker='x', label=f"Noise", alpha=0.6)
        else:
            plt.scatter(pca_result[mask, 0], pca_result[mask, 1], c=[colors[i]], label=f"Cluster {cluster_id}", alpha=0.7)

    plt.title(f"PCA Visualization of Clusters ({len(unique_clusters)-1} clusters + noise)")
    plt.xlabel(f"PCA Component 1 ({pca.explained_variance_ratio_[0]:.2%} variance)")
    plt.ylabel(f"PCA Component 2 ({pca.explained_variance_ratio_[1]:.2%} variance)")
    plt.legend()
    plt.grid(alpha=0.3)
    plt.show()

In [None]:
# Display up to 5 sample images from each of the top 5 clusters
display_cluster_samples(clusters, images_data, max_samples=5, max_clusters=5)

In [None]:
# Skip if no noise points
if -1 in clusters and len(clusters[-1]) > 0:
    noise_before = len(clusters.get(-1, []))

    start_time = time.time()
    updated_clusters = clusterer.assign_outliers(clusters, embeddings, threshold=0.8)
    assign_time = time.time() - start_time

    noise_after = len(updated_clusters.get(-1, []))
    assigned_count = noise_before - noise_after

    print(f"Reassigned {assigned_count} out of {noise_before} noise points in {assign_time:.2f} seconds")
    print(f"Remaining noise points: {noise_after}")

    # Update our clusters variable
    clusters = updated_clusters
else:
    print("No noise points to assign")

In [None]:
cluster_stats = clusterer.get_cluster_statistics(clusters, embeddings)

print("Cluster Statistics:")
for cluster_id, stats in sorted(cluster_stats.items()):
    if cluster_id == -1:
        continue  # Skip noise cluster statistics

    print(f"\nCluster {cluster_id}:")
    print(f"  Size: {stats['size']} images")
    print(f"  Average confidence: {stats['avg_confidence']:.4f}")
    print(f"  Minimum confidence: {stats['min_confidence']:.4f}")
    print(f"  Coherence: {stats['coherence']:.4f}")

In [None]:
# Create a dictionary for easier lookup
images_dict = {img['id']: img for img in images_data}

exposure_sorter = ExposureSorter(**config['exposure_sorting'])

# Create a container for processed cluster data
processed_clusters = {}

print("Processing exposure sequences:")
for cluster_id, cluster_items in clusters.items():
    # Skip noise cluster
    if cluster_id == -1:
        continue

    exposure_info = exposure_sorter.process_cluster(cluster_items, images_dict)

    # Store processed info
    processed_clusters[cluster_id] = {
        'items': cluster_items,
        'exposure_info': exposure_info
    }

    print(f"\nCluster {cluster_id} - {len(cluster_items)} images:")
    print(f"  HDR score: {exposure_info['hdr_score']:.4f}")
    print(f"  Has duplicates: {exposure_info['flags']['has_duplicates']}")
    print(f"  Has accidental shots: {exposure_info['flags']['has_accidental_shots']}")

    # Get unique exposure values for statistics
    evs = [info['ev'] for info in exposure_info['exposure_sequence']]
    if evs:
        print(f"  Exposure range: {min(evs):.1f} to {max(evs):.1f} EV (span: {max(evs) - min(evs):.1f})")

    # Show warnings for duplicates and accidental shots
    duplicates = [info for info in exposure_info['exposure_sequence'] if info.get('is_duplicate', False)]
    accidentals = [info for info in exposure_info['exposure_sequence'] if info.get('is_accidental', False)]

    if duplicates:
        print(f"  Found {len(duplicates)} duplicate images")
    if accidentals:
        print(f"  Found {len(accidentals)} accidental shots")

In [None]:
for cluster_id, cluster_data in processed_clusters.items():
    print(f"\nCluster {cluster_id} - Exposure Sequence:")
    exposure_sequence = cluster_data['exposure_info']['exposure_sequence']

    # Display the sorted exposure sequence
    display_exposure_sequence(exposure_sequence, images_data)

    # Print details about each image in the sequence
    print("Details:")
    for i, info in enumerate(exposure_sequence):
        status = ""
        if info.get('is_duplicate', False):
            status = "[DUPLICATE]"
        elif info.get('is_accidental', False):
            status = "[ACCIDENTAL]"

        print(f"  {i+1}. EV: {info['ev']:.1f} {status}")

In [None]:
validator = ClusterValidator(**config['validation'])

print("Validating clusters:")
for cluster_id, cluster_data in processed_clusters.items():
    cluster_items = cluster_data['items']
    exposure_info = cluster_data['exposure_info']

    # Validate cluster
    validation_result = validator.validate_cluster(
        cluster_items,
        images_dict,
        embeddings,
        exposure_info['hdr_score']
    )

    # Store validation results
    processed_clusters[cluster_id]['validation'] = validation_result

    print(f"\nCluster {cluster_id}:")
    print(f"  Valid: {validation_result['is_valid']}")
    print(f"  Confidence: {validation_result['confidence']:.4f}")

    # Print detailed validation scores
    details = validation_result['details']
    print("  Validation details:")
    print(f"    Feature similarity: {details['similarity_score']:.4f} (valid: {details['similarity_valid']})")
    print(f"    Geometric consistency: {details['geometry_score']:.4f} (valid: {details['geometry_valid']})")
    print(f"    HDR score: {details['hdr_score']:.4f} (valid: {details['hdr_valid']})")
    print(f"    Size: {details['size']} (valid: {details['size_valid']})")

In [None]:
# Check for split opportunities
split_candidates = {}
for cluster_id, cluster_data in processed_clusters.items():
    # Check only large or invalid clusters
    if len(cluster_data['items']) > config['validation']['min_cluster_size'] * 2 or not cluster_data['validation']['is_valid']:
        subclusters = validator.suggest_split(cluster_data['items'], images_dict, embeddings)
        if len(subclusters) > 1:
            split_candidates[cluster_id] = subclusters

# Check for merge opportunities
valid_clusters = {
    cid: data['items'] for cid, data in processed_clusters.items()
    if data['validation']['is_valid']
}
merge_candidates = validator.suggest_merge(valid_clusters, embeddings)

# Report results
if split_candidates:
    print("\nClusters that could be split:")
    for cluster_id, subclusters in split_candidates.items():
        print(f"  Cluster {cluster_id}: could be split into {len(subclusters)} subclusters")
        for i, sc in enumerate(subclusters):
            print(f"    Subcluster {i+1}: {len(sc)} images")
else:
    print("\nNo clusters need splitting")

if merge_candidates:
    print("\nClusters that could be merged:")
    for primary_id, secondary_ids in merge_candidates.items():
        print(f"  Cluster {primary_id} could be merged with: {secondary_ids}")
else:
    print("\nNo clusters should be merged")

In [None]:
# Create the exporter
exporter = ResultsExporter(
    output_base_dir=output_dir,
    **config['output']
)

# Get valid clusters
valid_clusters = {
    cid: data for cid, data in processed_clusters.items()
    if data['validation']['is_valid']
}

# Export valid clusters
start_time = time.time()
export_stats = exporter.export_clusters(valid_clusters, images_dict)
export_time = time.time() - start_time

print(f"Exported {export_stats['exported_clusters']} valid clusters in {export_time:.2f} seconds")
print(f"Exported {export_stats['exported_images']} images")

# Collect unassigned images
unassigned_images = []
# Add images from noise cluster
if -1 in clusters:
    unassigned_images.extend([img_id for img_id, _ in clusters[-1]])
# Add images from invalid clusters
for cid, data in processed_clusters.items():
    if not data['validation']['is_valid']:
        unassigned_images.extend([img_id for img_id, _ in data['items']])

# Export unassigned images
start_time = time.time()
unassigned_count = exporter.export_unassigned_images(unassigned_images, images_dict)
unassigned_time = time.time() - start_time

print(f"Exported {unassigned_count} unassigned images in {unassigned_time:.2f} seconds")

In [None]:
# Prepare pipeline statistics
pipeline_stats = {
    'total_images': len(images_data),
    'ingest_time': ingest_time,
    'embedding_time': embedding_time,
    'clustering_time': clustering_time,
    'validation_time': 0,  # We didn't measure this explicitly
    'export_time': export_time + unassigned_time,
    'total_time': ingest_time + embedding_time + clustering_time + export_time + unassigned_time
}

# Generate report
exporter.generate_report(export_stats, unassigned_count, pipeline_stats)

print("\nFinal Report:")
print(f"Total images processed: {len(images_data)}")
print(f"Valid clusters found: {len(valid_clusters)}")
print(f"Images in valid clusters: {export_stats['total_images']}")
print(f"Unassigned images: {unassigned_count}")
print(f"Total processing time: {pipeline_stats['total_time']:.2f} seconds")
print(f"\nOutput directory: {output_dir}")

In [None]:
import json

# Try to load and display the metadata.json file
metadata_path = Path(output_dir) / "grouped_output" / "metadata.json"
if metadata_path.exists():
    with open(metadata_path, 'r') as f:
        metadata = json.load(f)

    print("Sample from generated metadata.json:")
    print("\nCluster count:", len(metadata.get('clusters', {})))

    # Display info for one random cluster if available
    if metadata.get('clusters'):
        cluster_id = list(metadata['clusters'].keys())[0]
        cluster_meta = metadata['clusters'][cluster_id]

        print(f"\nSample Cluster {cluster_id}:")
        print(f"  Confidence: {cluster_meta.get('confidence', 0):.4f}")
        print(f"  HDR Score: {cluster_meta.get('hdr_score', 0):.4f}")
        print(f"  Image Count: {cluster_meta.get('image_count', 0)}")

        if cluster_meta.get('images'):
            print("\n  First 3 images in sequence:")
            for img in cluster_meta['images'][:3]:
                print(f"    Position {img['sequence_position']} - EV: {img['exposure_value']:.1f} - {Path(img['source_path']).name}")
else:
    print(f"Metadata file not found at {metadata_path}")