In [1]:
import os
import numpy as np
from tensorflow.keras.datasets import mnist
from sklearn.model_selection import train_test_split
from PIL import Image


In [2]:
# Load the MNIST dataset
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

# Combine train and test data to create full dataset
full_images = np.concatenate([train_images, test_images], axis=0)
full_labels = np.concatenate([train_labels, test_labels], axis=0)

# Split the dataset into train, validation, and test sets
train_images, val_test_images, train_labels, val_test_labels = train_test_split(
    full_images, full_labels, test_size=0.3, random_state=42)
val_images, test_images, val_labels, test_labels = train_test_split(
    val_test_images, val_test_labels, test_size=0.5, random_state=42)

# Create folders to store the datasets
os.makedirs('data/train', exist_ok=True)
os.makedirs('data/val', exist_ok=True)
os.makedirs('data/test', exist_ok=True)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [3]:

# Save images to folders
def save_images(images, labels, folder):
    for i, (image, label) in enumerate(zip(images, labels)):
        image_folder = os.path.join(folder, str(label))
        os.makedirs(image_folder, exist_ok=True)
        image_path = os.path.join(image_folder, f"{i}.png")
        image = Image.fromarray(image)
        image.save(image_path)

save_images(train_images, train_labels, 'data/train')
save_images(val_images, val_labels, 'data/val')
save_images(test_images, test_labels, 'data/test')

print("Data generation and storage completed.")


Data generation and storage completed.


In [10]:
!zip -r /content/data/val.zip /content/data/val

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
updating: content/data/val/0/5805.png (stored 0%)
updating: content/data/val/0/7834.png (stored 0%)
updating: content/data/val/0/674.png (stored 0%)
updating: content/data/val/0/7902.png (stored 0%)
updating: content/data/val/0/2150.png (stored 0%)
updating: content/data/val/0/8042.png (stored 0%)
updating: content/data/val/0/6279.png (stored 0%)
updating: content/data/val/0/3566.png (stored 0%)
updating: content/data/val/0/3258.png (stored 0%)
updating: content/data/val/0/306.png (stored 0%)
updating: content/data/val/0/1800.png (stored 0%)
updating: content/data/val/0/5216.png (stored 0%)
updating: content/data/val/0/7507.png (stored 0%)
updating: content/data/val/0/1728.png (stored 0%)
updating: content/data/val/0/1942.png (stored 0%)
updating: content/data/val/0/6853.png (stored 0%)
updating: content/data/val/0/4552.png (stored 0%)
updating: content/data/val/0/5854.png (stored 0%)
updating: content/data/val/0/9658.png

In [12]:
from google.colab import files
files.download("/content/data/val.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>