In [None]:
import os, shutil
from tqdm import tqdm
from torchvision.datasets import CelebA
from google.colab import files

# --- Step 1: Load CelebA dataset ---
ds = CelebA(root="/content/celeba_data", split="all", target_type="identity", download=True)

# CelebA image directory
img_dir = os.path.join(ds.root, ds.base_folder, "img_align_celeba")


🔎 Reference file 202178.jpg belongs to identity 8945


Copying identity 8945: 100%|██████████| 202599/202599 [00:00<00:00, 236256.52it/s]

✅ Copied 30 images for identity 8945 to /content/celebrity_8945_images





<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# --- Step 2: Map reference image number to identity ---
reference_index = 202178     # the image number
ref_fname = f"{reference_index:06d}.jpg"  # zero-padded filename, e.g., "202159.jpg"

try:
    idx = ds.filename.index(ref_fname)
except ValueError:
    raise ValueError(f"File {ref_fname} not found in CelebA dataset!")

ref_identity = int(ds.identity[idx])
print(f"🔎 Reference file {ref_fname} belongs to identity {ref_identity}")

In [None]:
# --- Step 3: Copy all images of that identity ---
out_dir = f"/content/celebrity_{ref_identity}_images"
os.makedirs(out_dir, exist_ok=True)

count = 0
for i in tqdm(range(len(ds)), desc=f"Copying identity {ref_identity}"):
    if int(ds.identity[i]) == ref_identity:
        src = os.path.join(img_dir, ds.filename[i])
        if os.path.isfile(src):
            shutil.copy2(src, out_dir)
            count += 1

print(f"✅ Copied {count} images for identity {ref_identity} to {out_dir}")

In [None]:
# --- Step 4: Zip and download ---
zip_filename = f"celebrity_{ref_identity}_images.zip"
shutil.make_archive(zip_filename.replace(".zip", ""), "zip", out_dir)
files.download(zip_filename)