In [2]:
# Download script
# Necesary to Run the download script
import tensorflow as tf
import numpy as np
import os
from sklearn.model_selection import train_test_split
import shutil  # For deleting the folder
import scipy
import torch
device = ("cuda" if torch.cuda.is_available() else "cpu")

# Define the folder where you want to save the dataset
dataset_folder = "Dataset1"

# Check if the folder exists, and if so, delete it
if os.path.exists(dataset_folder):
    print(f"Deleting existing folder '{dataset_folder}'...")
    shutil.rmtree(dataset_folder)

# Create the folders for training, validation, and test datasets
train_folder = os.path.join(dataset_folder, "Train")
val_folder = os.path.join(dataset_folder, "Validation")
test_folder = os.path.join(dataset_folder, "Test")

os.makedirs(train_folder, exist_ok=True)
os.makedirs(val_folder, exist_ok=True)
os.makedirs(test_folder, exist_ok=True)

# Create subfolders for clean and noisy images
train_clean_folder = os.path.join(train_folder, "Groundtruth")
train_noisy_folder = os.path.join(train_folder, "Degraded")

val_clean_folder = os.path.join(val_folder, "Groundtruth")
val_noisy_folder = os.path.join(val_folder, "Degraded")

test_clean_folder = os.path.join(test_folder, "Groundtruth")
test_noisy_folder = os.path.join(test_folder, "Degraded")

os.makedirs(train_clean_folder, exist_ok=True)
os.makedirs(train_noisy_folder, exist_ok=True)

os.makedirs(val_clean_folder, exist_ok=True)
os.makedirs(val_noisy_folder, exist_ok=True)

os.makedirs(test_clean_folder, exist_ok=True)
os.makedirs(test_noisy_folder, exist_ok=True)

# Load the MNIST dataset using TensorFlow
mnist = tf.keras.datasets.mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

# Smaller sets
N_train_set = 2000
N = train_images.shape[1]

# Artificial blur
sigma = 1

# Since N is very small, we can implement the blurring operator as a matrix
K_Mat = np.eye(N ** 2)
for i in range(N ** 2):
    figu = K_Mat[:, i].reshape(N, N)
    K_Mat[:, i] = scipy.ndimage.gaussian_filter(figu, sigma).reshape(N ** 2)
KtK_Mat = np.matmul(np.transpose(K_Mat), K_Mat)

# Add noise
noise_lev = 0.05

# Split the data into training, validation, and test sets
train_images, val_test_images, train_labels, val_test_labels = train_test_split(train_images, train_labels, test_size=0.2, random_state=42)
val_images, test_images, val_labels, test_labels = train_test_split(val_test_images, val_test_labels, test_size=0.5, random_state=42)
N_val_set = int(0.2 * N_train_set)
N_test_set = int(0.2 * N_train_set)
N_train_set = int(0.5 * N_train_set)
train_images=train_images[0:N_train_set, :, :]
test_images=train_images[0:N_test_set, :, :]
val_images=train_images[0:N_val_set, :, :]

# These are matrices of vectors
test_noisy_images = np.matmul(test_images.reshape(-1, N ** 2), np.transpose(K_Mat)).reshape(-1, N, N) + noise_lev * np.random.randn(*test_images.shape)
train_noisy_images = np.matmul(train_images.reshape(-1, N ** 2), np.transpose(K_Mat)).reshape(-1, N, N) + noise_lev * np.random.randn(*train_images.shape)
val_noisy_images = np.matmul(val_images.reshape(-1, N ** 2), np.transpose(K_Mat)).reshape(-1, N, N) + noise_lev * np.random.randn(*val_images.shape)

# Save each dataset in its respective folder
def save_images_as_arrays(data, label, clean_folder, noisy_folder, prefix, set_type):
    for i, (image_data, image_label) in enumerate(zip(data, label)):
        clean_image_filename = os.path.join(clean_folder, f"{prefix}_Gr_{set_type}_{i }.npy")
        noisy_image_filename = os.path.join(noisy_folder, f"{prefix}_Dr_{set_type}_{i}.npy")
        np.save(clean_image_filename, image_data)
        #np.save(clean_image_filename.replace(".npy", "_label.npy"), image_label)
        np.save(noisy_image_filename, data[i])
        #np.save(noisy_image_filename.replace(".npy", "_label.npy"), image_label)

# Save training images and labels
save_images_as_arrays(train_images, train_labels, train_clean_folder, train_noisy_folder, "x", "tr")

# Save validation images and labels
save_images_as_arrays(val_images, val_labels, val_clean_folder, val_noisy_folder, "x", "va")

# Save testing images and labels
save_images_as_arrays(test_images, test_labels, test_clean_folder, test_noisy_folder, "x", "te")

print("Datasets split and saved in 'Dataset1' folder successfully.")

Deleting existing folder 'Dataset1'...
Datasets split and saved in 'Dataset1' folder successfully.
