In [1]:
import sys
from pathlib import Path
from collections import Counter

import numpy as np
from PIL import Image
from hydra import compose, initialize
from hydra.core.global_hydra import GlobalHydra
from hydra.utils import instantiate

In [5]:
# Add project root to path
project_root = str(Path().absolute().parent)
sys.path.append(project_root)

# Config selections
DATASET = "cityscapes"  # hydra dataset_evaluation name
MODEL = "jafar"
TASK = "seg"

# Initialize Hydra configuration
if not GlobalHydra.instance().is_initialized():
    initialize(config_path="../config", version_base=None)
cfg = compose(
    config_name="eval",
    overrides=[
        f"dataset_evaluation={DATASET}",
        f"eval.task={TASK}",
        f"model={MODEL}",
        "backbone.name='vit_small_patch16_dinov3.lvd1689m'",
        f"project_root={project_root}",
    ],
)

# Instantiate datasets WITHOUT transforms to read original sizes
cfg.dataset_evaluation.split = "train"
train_ds = instantiate(cfg.dataset_evaluation, transform=None, target_transform=None)

cfg.dataset_evaluation.split = "val"
val_ds = instantiate(cfg.dataset_evaluation, transform=None, target_transform=None)

# Helper to compute sizes/ratios; supports datasets without explicit image_files by indexing

def collect_sizes(dataset):
    widths, heights, ratios = [], [], []
    from tqdm import tqdm

    has_files = hasattr(dataset, "image_files") and isinstance(getattr(dataset, "image_files"), (list, tuple))
    if has_files:
        iterable = dataset.image_files
        def open_from_item(item):
            return Image.open(item)
    else:
        iterable = range(len(dataset))
        def open_from_item(item):
            sample = dataset[item]
            # Prefer path if present; otherwise infer from PIL image size before transforms
            if isinstance(sample.get("image"), Image.Image):
                return sample["image"]
            # If tensor, its size is post-transform. Try to load raw file if available via common attrs
            if hasattr(dataset, "cityscapes_dataset"):
                # torchvision Cityscapes stores paths in images folder mirroring indices
                img, _ = dataset.cityscapes_dataset[item]
                return img
            raise RuntimeError("Cannot access original image for this dataset")

    for item in tqdm(iterable, desc="Processing images"):
        with open_from_item(item) as im:
            w, h = im.size
        widths.append(w)
        heights.append(h)
        ratios.append(w / h)
    return np.array(widths), np.array(heights), np.array(ratios)

train_w, train_h, train_r = collect_sizes(train_ds)
val_w, val_h, val_r = collect_sizes(val_ds)

# Aggregate
all_w = np.concatenate([train_w, val_w])
all_h = np.concatenate([train_h, val_h])
all_r = np.concatenate([train_r, val_r])

# Print summary
print("Counts — train/val/all:", len(train_w), len(val_w), len(all_w))
print("Width — min/median/max:", int(all_w.min()), int(np.median(all_w)), int(all_w.max()))
print("Height — min/median/max:", int(all_h.min()), int(np.median(all_h)), int(all_h.max()))
print("Aspect ratio (W/H) — min/median/max:", round(all_r.min(), 4), round(float(np.median(all_r)), 4), round(all_r.max(), 4))

# Most common resolutions
res_pairs = list(zip(all_w.tolist(), all_h.tolist()))
common_res = Counter(res_pairs).most_common(10)
print("Top resolutions (W,H,count):")
for (w, h), c in common_res:
    print((w, h, c))


Processing images: 100%|██████████| 2975/2975 [08:22<00:00,  5.92it/s]
Processing images: 100%|██████████| 500/500 [01:24<00:00,  5.89it/s]

Counts — train/val/all: 2975 500 3475
Width — min/median/max: 2048 2048 2048
Height — min/median/max: 1024 1024 1024
Aspect ratio (W/H) — min/median/max: 2.0 2.0 2.0
Top resolutions (W,H,count):
(2048, 1024, 3475)



