In [1]:
#This code resizes all of the images to 244x244 to work with our model Efficient-Net-B7
# 1) Unzip your archive.zip (replaces old output if any)
!rm -rf /content/archive_unzipped /content/archive_244
!unzip -q /content/archive.zip -d /content/archive_unzipped

# 2) Install dependencies
%pip -q install pillow tqdm

from pathlib import Path
from PIL import Image, ImageOps
from tqdm import tqdm

# Root of unzipped images (this now contains nontoxic_images/, toxic_images/, etc.)
ROOT_IN = Path("/content/archive_unzipped")
ROOT_OUT = Path("/content/archive_244")
ROOT_OUT.mkdir(parents=True, exist_ok=True)

IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".webp", ".bmp", ".tiff"}

def iter_images_recursive(root: Path):
    """Yield all image files under root, in all subfolders."""
    for p in root.rglob("*"):
        if p.is_file() and p.suffix.lower() in IMAGE_EXTS:
            yield p

target_size = (244, 244)

files = list(iter_images_recursive(ROOT_IN))
print(f"Found {len(files)} images under {ROOT_IN}")

for src in tqdm(files, desc="Resizing to 244x244"):
    # preserve subfolder structure (e.g. nontoxic_images/000/img.png)
    rel = src.relative_to(ROOT_IN)
    dst = ROOT_OUT / rel
    dst.parent.mkdir(parents=True, exist_ok=True)

    with Image.open(src) as im:
        # fix EXIF rotation
        im = ImageOps.exif_transpose(im)

        # center-crop + resize to exactly 244x244 (keeps aspect ratio)
        im = ImageOps.fit(im, target_size, method=Image.LANCZOS, centering=(0.5, 0.5))

        # JPEGs can't have alpha
        if dst.suffix.lower() in {".jpg", ".jpeg"} and im.mode in ("RGBA", "LA"):
            im = im.convert("RGB")

        im.save(dst)

print("Done. Resized images are in:", ROOT_OUT.resolve())

# 3) Optional: zip the resized dataset to download
!zip -qr /content/archive_244.zip /content/archive_244
print("Zipped resized dataset at /content/archive_244.zip")


Found 9952 images under /content/archive_unzipped


Resizing to 244x244: 100%|██████████| 9952/9952 [01:27<00:00, 113.28it/s]


Done. Resized images are in: /content/archive_244
Zipped resized dataset at /content/archive_244.zip
