In [1]:
import os
import zipfile
from PIL import Image
from io import BytesIO
from tqdm import tqdm  # Import tqdm for progress bar

In [2]:
# Define paths
zip_path = os.path.expanduser("~/Downloads/compressed_images.zip")  # Change if needed
extract_folder = os.path.expanduser("~/Downloads/extracted_images_2")
output_zip_path = os.path.expanduser("~/Downloads/twice_compressed_images.zip")

# Ensure extraction folder exists
os.makedirs(extract_folder, exist_ok=True)

In [3]:
import shutil

# Remove the extracted folder if it already exists
if os.path.exists(extract_folder):
    shutil.rmtree(extract_folder)

In [4]:
# Step 1: Extract images.zip
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_folder)

# Get the top-level "images" folder inside extracted_images
root_images_folder = os.path.join(extract_folder, "images")

In [5]:
import os

# Check the structure of the extracted files
for root, dirs, files in os.walk(extract_folder):
    print(f"Root: {root}")
    print(f"Directories: {dirs}")
    print(f"Files: {files[:5]}")  # Show first 5 files to confirm

Root: C:\Users\varko/Downloads/extracted_images_2
Directories: ['images']
Files: []
Root: C:\Users\varko/Downloads/extracted_images_2\images
Directories: ['argentina', 'austria', 'canada', 'chile', 'france', 'iceland', 'italy', 'japan', 'new_zealand', 'norway', 'peru', 'switzerland']
Files: []
Root: C:\Users\varko/Downloads/extracted_images_2\images\argentina
Directories: []
Files: ['1741691006_-30.9927065_-68.8548332.jpg', '1741691007_-22.8471847_-65.2269743.jpg', '1741691009_-22.8081965_-65.8481546.jpg', '1741691010_-22.1328156_-65.743084.jpg', '1741691010_-27.8027889_-68.02298.jpg']
Root: C:\Users\varko/Downloads/extracted_images_2\images\austria
Directories: []
Files: ['1741629848_47.013154_12.642665.jpg', '1741629893_46.9753931_13.1763415.jpg', '1741629943_47.1592688_12.9692941.jpg', '1741629986_47.0074369_10.2812601.jpg', '1741630033_47.0602395_12.8158785.jpg']
Root: C:\Users\varko/Downloads/extracted_images_2\images\canada
Directories: []
Files: ['1741638729_50.232387_-114.39071

In [6]:
# Step 2: Compress images while keeping folder structure
compressed_images = []

# Collect all image file paths first
image_files = []
for root, _, files in os.walk(root_images_folder):
    for file in files:
        if file.lower().endswith(('.png', '.jpg', '.jpeg', '.webp')):  # Supported formats
            image_files.append(os.path.join(root, file))
            if len(image_files) % 50 == 0:  # Print every 50 files found
                print(f"Found {len(image_files)} images so far...")

Found 50 images so far...
Found 100 images so far...
Found 150 images so far...
Found 200 images so far...
Found 250 images so far...
Found 300 images so far...
Found 350 images so far...
Found 400 images so far...
Found 450 images so far...
Found 500 images so far...
Found 550 images so far...
Found 600 images so far...
Found 650 images so far...
Found 700 images so far...
Found 750 images so far...
Found 800 images so far...
Found 850 images so far...
Found 900 images so far...
Found 950 images so far...
Found 1000 images so far...
Found 1050 images so far...
Found 1100 images so far...
Found 1150 images so far...
Found 1200 images so far...
Found 1250 images so far...
Found 1300 images so far...
Found 1350 images so far...
Found 1400 images so far...
Found 1450 images so far...
Found 1500 images so far...
Found 1550 images so far...
Found 1600 images so far...
Found 1650 images so far...
Found 1700 images so far...
Found 1750 images so far...
Found 1800 images so far...
Found 1850 i

In [None]:
# Process images with tqdm progress bar
for img_path in tqdm(image_files, desc="Processing Images", unit="image"):
    with Image.open(img_path) as img:
        # Calculate new dimensions (half in both directions)
        new_width = img.width // 2
        new_height = img.height // 2
        img_resized = img.resize((new_width, new_height), Image.Resampling.LANCZOS)

        # Save to memory buffer
        img_io = BytesIO()
        img_format = "JPEG" if img.format == "JPEG" else "PNG"
        img_resized.save(img_io, format=img_format, quality=85)  # Adjust quality if needed

        # Preserve folder structure inside ZIP
        relative_path = os.path.relpath(img_path, extract_folder)  # e.g., "images/USA/image1.jpg"
        compressed_images.append((relative_path, img_io.getvalue()))

# Step 3: Save compressed images to new ZIP while keeping the structure
with zipfile.ZipFile(output_zip_path, 'w', zipfile.ZIP_DEFLATED) as zip_out:
    for file_name, img_data in tqdm(compressed_images, desc="Saving to ZIP", unit="image"):
        zip_out.writestr(file_name, img_data)

print(f" Compression complete! Compressed images saved to: {output_zip_path}")

Processing Images:   2%|▏         | 63/2647 [01:03<32:30,  1.32image/s]  