In [1]:
import gzip
import numpy as np
from PIL import Image
import os

def ubyte_to_image(filename, save_dir, img_format="png"):
    """
    Converts images in a .ubyte or .ubyte.gz file to PNG or JPG format.
    
    Args:
        filename (str): Path to the .ubyte or .ubyte.gz file.
        save_dir (str): Directory to save converted images.
        img_format (str): Image format to save as, either "png" or "jpg".
    """
    # Ensure the save directory exists
    os.makedirs(save_dir, exist_ok=True)
    
    with gzip.open(filename, 'rb') if filename.endswith('.gz') else open(filename, 'rb') as f:
        # Skip the header information (first 16 bytes for MNIST)
        f.read(16)
        
        # Read the rest of the data as image bytes
        buffer = f.read()
        images = np.frombuffer(buffer, dtype=np.uint8)
        
        # Reshape the data into (num_images, height, width)
        images = images.reshape(-1, 28, 28)  # 28x28 is specific to MNIST images
        
        # Convert each image to PNG or JPG and save
        for i, img_array in enumerate(images):
            img = Image.fromarray(img_array, mode='L')  # 'L' mode for grayscale
            img.save(f"{save_dir}/image_{i}.{img_format}", format=img_format.upper())
            print(f"Saved {save_dir}/image_{i}.{img_format}")

# Convert train and test images
ubyte_to_image('./data/MNIST/raw/train-images-idx3-ubyte.gz', 'output_images/train', img_format="png")
ubyte_to_image('./data/MNIST/raw/t10k-images-idx3-ubyte.gz', 'output_images/test', img_format="png")


Saved output_images/train/image_0.png
Saved output_images/train/image_1.png
Saved output_images/train/image_2.png
Saved output_images/train/image_3.png
Saved output_images/train/image_4.png
Saved output_images/train/image_5.png
Saved output_images/train/image_6.png
Saved output_images/train/image_7.png
Saved output_images/train/image_8.png
Saved output_images/train/image_9.png
Saved output_images/train/image_10.png
Saved output_images/train/image_11.png
Saved output_images/train/image_12.png
Saved output_images/train/image_13.png
Saved output_images/train/image_14.png
Saved output_images/train/image_15.png
Saved output_images/train/image_16.png
Saved output_images/train/image_17.png
Saved output_images/train/image_18.png
Saved output_images/train/image_19.png
Saved output_images/train/image_20.png
Saved output_images/train/image_21.png
Saved output_images/train/image_22.png
Saved output_images/train/image_23.png
Saved output_images/train/image_24.png
Saved output_images/train/image_25.