In [2]:
import os
import shutil

item_A_id = '2008_000336'
img_src  = f"/home/scai/phd/aiz218323/scratch/abhishek_rl/scv/a5/data/VOC2012_train_val/VOC2012_train_val/JPEGImages/{item_A_id}.jpg"
mask_src = f"/home/scai/phd/aiz218323/scratch/abhishek_rl/scv/a5/data/VOC2012_train_val/VOC2012_train_val/SegmentationClass/{item_A_id}.png"
dest_dir = "/home/scai/phd/aiz218323/scratch/abhishek_rl/scv/a5/processed_data/A"
os.makedirs(dest_dir, exist_ok=True)

# Destination file paths
img_dst = os.path.join(dest_dir, f"{item_A_id}.jpg")
mask_dst = os.path.join(dest_dir, f"{item_A_id}.png")

# Copy files
shutil.copy(img_src, img_dst)
shutil.copy(mask_src, mask_dst)

print("Item A image and mask copied to:", dest_dir)

Item A image and mask copied to: /home/scai/phd/aiz218323/scratch/abhishek_rl/scv/a5/processed_data/A


In [3]:
from pathlib import Path
from collections import defaultdict

# VOC directory
voc_base = Path("/home/scai/phd/aiz218323/scratch/abhishek_rl/scv/a5/data/VOC2012_train_val/VOC2012_train_val")
main_dir = voc_base / "ImageSets" / "Main"

# Item A image ID
item_A_id = "2008_000336"

# Classes you care about
selected_classes = ["dog", "bus", "chair", "tvmonitor", "sofa"]

# Map from class name → list of image IDs
class_to_images = defaultdict(list)

# Build mapping
for cls in selected_classes:
    with open(main_dir / f"{cls}_train.txt", "r") as f:
        for line in f:
            img_id, label = line.strip().split()
            if label == "1" and img_id != item_A_id:
                class_to_images[cls].append(img_id)

# Unique image IDs across all selected classes
all_image_ids = sorted(set(id for ids in class_to_images.values() for id in ids))

# Print summary
for cls in selected_classes:
    print(f"{cls:<7} → {len(class_to_images[cls])} images")

print(f"\n Total unique images across all selected classes: {len(all_image_ids)}")


dog     → 631 images
bus     → 213 images
chair   → 566 images
tvmonitor → 290 images
sofa    → 257 images

 Total unique images across all selected classes: 1704


In [4]:
import random

# Previously built map: class_to_images
dog_images = class_to_images["dog"]
non_dog_images = sorted(set().union(
    *[class_to_images[c] for c in selected_classes if c != "dog"]
))

# Randomly sample 150 from each
random.seed(42)
dog_sample = random.sample(dog_images, 400)
non_dog_sample = random.sample(non_dog_images, 400)

# Combined C set
C_ids = sorted(dog_sample + non_dog_sample)
print(f"Total images in C: {len(C_ids)} (400 dog + 400 non-dog)")

Total images in C: 800 (400 dog + 400 non-dog)


In [5]:
import os
import shutil
from pathlib import Path

voc_img_dir = Path("/home/scai/phd/aiz218323/scratch/abhishek_rl/scv/a5/data/VOC2012_train_val/VOC2012_train_val/JPEGImages")
dst_dir = Path("/home/scai/phd/aiz218323/scratch/abhishek_rl/scv/a5/processed_data/C")
dst_dir.mkdir(parents=True, exist_ok=True)

for img_id in C_ids:
    src = voc_img_dir / f"{img_id}.jpg"
    dst = dst_dir / f"{img_id}.jpg"
    shutil.copy(src, dst)

print("All C images copied to:", dst_dir)

All C images copied to: /home/scai/phd/aiz218323/scratch/abhishek_rl/scv/a5/processed_data/C


In [7]:
from pathlib import Path
import os
# Paths
C_prime_dir = "/home/scai/phd/aiz218323/scratch/abhishek_rl/scv/a5/processed_data/C_prime"
dog_list_file = "/home/scai/phd/aiz218323/scratch/abhishek_rl/scv/a5/data/VOC2012_train_val/VOC2012_train_val/ImageSets/Main/dog_train.txt"

# Load dog image label map from VOC
dog_id_map = {}
with open(dog_list_file, "r") as f:
    for line in f:
        img_id, label = line.strip().split()
        dog_id_map[img_id] = int(label)

# Check how many in C′ are dog images
C_prime_ids = [f[:-4] for f in os.listdir(C_prime_dir) if f.endswith(".jpg")]

dog_count = sum(1 for img_id in C_prime_ids if dog_id_map.get(img_id, -1) == 1)
non_dog_count = len(C_prime_ids) - dog_count

print(f"Total images in C′: {len(C_prime_ids)}")
print(f"Dog images: {dog_count}")
print(f"Non-dog images: {non_dog_count}")


Total images in C′: 300
Dog images: 298
Non-dog images: 2


In [10]:
from pathlib import Path

base_path = Path("/home/scai/phd/aiz218323/scratch/abhishek_rl/scv/a5/data/VOC2012_train_val/VOC2012_train_val")
val_ids_path = base_path / "ImageSets/Segmentation/val.txt"
dog_val_path = base_path / "ImageSets/Main/dog_val.txt"

# Load validation set image IDs
with open(val_ids_path, "r") as f:
    val_ids = set(line.strip() for line in f)

# Load dog presence labels
dog_ids = []
with open(dog_val_path, "r") as f:
    for line in f:
        img_id, label = line.strip().split()
        if img_id in val_ids and label == "1":
            dog_ids.append(img_id)

print(f"Selected {len(dog_ids)} dog images for test set D")


Selected 101 dog images for test set D


In [None]:
import shutil
import os

D_dir = "/home/scai/phd/aiz218323/scratch/abhishek_rl/scv/a5/processed_data/D"
os.makedirs(D_dir + "/images", exist_ok=True)
os.makedirs(D_dir + "/masks", exist_ok=True)

for img_id in dog_ids:
    img_src = base_path / f"JPEGImages/{img_id}.jpg"
    mask_src = base_path / f"SegmentationClass/{img_id}.png"
    
    shutil.copy(img_src, f"{D_dir}/images/{img_id}.jpg")
    shutil.copy(mask_src, f"{D_dir}/masks/{img_id}.png")

print(f"Copied {len(dog_ids)} images and masks to {D_dir}")


✅ Copied 101 images and masks to /home/scai/phd/aiz218323/scratch/abhishek_rl/scv/a5/processed_data/D
