In [1]:
try:
    import google.colab  # noqa: F401

    # specify the version of DataEval (==X.XX.X) for versions other than the latest
    %pip install -q dataeval[all] maite-datasets
except Exception:
    pass

In [2]:
from maite_datasets.object_detection import SeaDrone

from dataeval.core import calculate
from dataeval.core.flags import ImageStats
from dataeval.data import Select
from dataeval.data.selections import Limit

In [3]:
# Load the SeaDrone dataset
sd_dataset = SeaDrone(root="./data", image_set="val", download=True)

# Limit to first 50 images for demonstration
dataset = Select(sd_dataset, Limit(50))

print(f"Dataset size: {len(dataset)} images")
print(f"Sample image shape: {dataset[0][0].shape}")
print(f"Sample targets (boxes): {len(dataset[0][1].boxes)} boxes in first image")

Dataset size: 50 images
Sample image shape: (3, 2160, 3840)
Sample targets (boxes): 7 boxes in first image


In [4]:
# Calculate custom individual statistics for full images only (per_image=True, per_target=False)
results_image_only = calculate(
    data=dataset,
    stats=ImageStats.PIXEL_MEAN | ImageStats.DIMENSION_ASPECT_RATIO | ImageStats.VISUAL_SHARPNESS,
    per_image=True,
    per_target=False,
)

print(f"Computed statistics: {list(results_image_only['stats'])}")
print(f"\nNumber of results: {len(results_image_only['source_index'])}")
print(f"Total images processed: {results_image_only['image_count']}")

Processing images for DimensionStat, PixelStat, VisualStat:   0%|          | 0/50 [00:00<?, ?it/s]

Computed statistics: ['aspect_ratio', 'mean', 'sharpness']

Number of results: 50
Total images processed: 50


In [5]:
# Display first 5 source indices
print("First 5 SourceIndex entries (image-level only):")
for i, src in enumerate(results_image_only["source_index"][:5]):
    print(f"  {i}: item={src.item}, target={src.target}, channel={src.channel}")

print(f"\nAll entries have target=None: {all(src.target is None for src in results_image_only['source_index'])}")

First 5 SourceIndex entries (image-level only):
  0: item=0, target=None, channel=None
  1: item=1, target=None, channel=None
  2: item=2, target=None, channel=None
  3: item=3, target=None, channel=None
  4: item=4, target=None, channel=None

All entries have target=None: True


In [6]:
# Calculate basic pixel statistics for targets only (per_image=False, per_target=True)
results_target_only = calculate(
    data=dataset,
    stats=ImageStats.PIXEL_BASIC,
    per_image=False,
    per_target=True,
    per_channel=False,
)

print(f"Computed statistics: {list(results_target_only['stats'])}")
print(f"Number of target-level results: {len(results_target_only['source_index'])}")
print(f"Total targets processed: {sum(results_target_only['object_count'])}")

# Display source indices for targets from first image
print("\nSourceIndex entries for targets in first few images:")
for i, src in enumerate(results_target_only["source_index"][:5]):
    print(f"  {i}: image={src.item}, target={src.target}, channel={src.channel}")

Processing images for PixelStat:   0%|          | 0/50 [00:00<?, ?it/s]

Computed statistics: ['mean', 'std', 'var']
Number of target-level results: 300
Total targets processed: 300

SourceIndex entries for targets in first few images:
  0: image=0, target=0, channel=None
  1: image=0, target=1, channel=None
  2: image=0, target=2, channel=None
  3: image=0, target=3, channel=None
  4: image=0, target=4, channel=None


In [7]:
# Calculate basic dimension statistics for full images, boxes, and channels (per_image=True, per_target=True)
results_both = calculate(
    data=dataset,
    stats=ImageStats.DIMENSION_BASIC,
    per_image=True,
    per_target=True,
)

print(f"Number of results (images + boxes): {len(results_both['source_index'])}")
print(f"Total images processed: {results_both['image_count']}")
print(f"Total boxes processed: {sum(results_both['object_count'])}")
print(f"Statistics calculated for each image: {list(results_both['stats'])}")

# Separate image-level and box-level results
image_indices = [i for i, src in enumerate(results_both["source_index"]) if src.target is None]
target_indices = [i for i, src in enumerate(results_both["source_index"]) if src.target is not None]

print(f"\nImage-level results: {len(image_indices)}")
print(f"Target-level results: {len(target_indices)}")

Processing images for DimensionStat:   0%|          | 0/50 [00:00<?, ?it/s]

Number of results (images + boxes): 350
Total images processed: 50
Total boxes processed: 300
Statistics calculated for each image: ['width', 'height', 'channels']

Image-level results: 50
Target-level results: 300
