In [3]:
import os
import struct
import numpy as np
from PIL import Image

# Paths to MNIST/FashionMNIST files
images_file = "data/FashionMNIST/raw/t10k-images-idx3-ubyte"
labels_file = "data/FashionMNIST/raw/t10k-labels-idx1-ubyte"
output_dir = "mnist_pngs"
os.makedirs(output_dir, exist_ok=True)

# Optional: label names for FashionMNIST
label_names = [
    "T-shirt/top", "Trouser", "Pullover", "Dress", "Coat",
    "Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot"
]

# Read labels
with open(labels_file, "rb") as f:
    magic, num_labels = struct.unpack(">II", f.read(8))
    labels = np.frombuffer(f.read(), dtype=np.uint8)
    print(f"Labels: {num_labels} loaded.")

# Read images
with open(images_file, "rb") as f:
    magic, num_images, rows, cols = struct.unpack(">IIII", f.read(16))
    print(f"Images: {num_images}, Size: {rows}x{cols}")

    for i in range(num_images):
        # Read one image (rows*cols bytes)
        img_data = f.read(rows * cols)
        img_array = np.frombuffer(img_data, dtype=np.uint8).reshape(rows, cols)
        img = Image.fromarray(img_array, mode="L")

        # Use label as file name
        label_idx = labels[i]
        label_name = label_names[label_idx]
        # Optional: add index to avoid overwriting same label
        filename = f"{i}_{label_name.replace('/', '_')}.png"

        img.save(os.path.join(output_dir, filename))


Labels: 10000 loaded.
Images: 10000, Size: 28x28
