In [None]:
### download MNIST-M from https://github.com/mashaan14/MNIST-M/blob/main/MNIST-M.zip

In [None]:
import os
import random
from PIL import Image
import numpy as np

def load_and_resize_images(data_dir, image_size=(32, 32)):
    images = []
    labels = []
    
    # Traverse directories
    for digit in range(10):  # Digits 0 to 9
        digit_dir = os.path.join(data_dir, str(digit))
        image_files = os.listdir(digit_dir)
        
        # Load each image, resize and append it with its label
        for img_file in image_files:
            img_path = os.path.join(digit_dir, img_file)
            image = Image.open(img_path).convert('RGB')  # Ensure it's in RGB
            image = image.resize(image_size)
            image = np.array(image, dtype=np.float64) / 255.0  # Convert to float64 and normalize
            images.append(image)
            labels.append(digit)
    
    return np.array(images), np.array(labels)

def create_balanced_subset(images, labels, subset_size_per_class):
    subset_images = []
    subset_labels = []
    
    for digit in range(10):  # Digits 0 to 9
        idx = np.where(labels == digit)[0]
        sampled_idx = random.sample(list(idx), subset_size_per_class)
        subset_images.extend(images[sampled_idx])
        subset_labels.extend(labels[sampled_idx])
    
    return np.array(subset_images), np.array(subset_labels)

# Set paths to training and testing directories
train_dir = 'path_to/MNIST-M/training'
test_dir = 'path_to/MNIST-M/testing'

# Load images from both training and testing directories
train_images, train_labels = load_and_resize_images(train_dir)
test_images, test_labels = load_and_resize_images(test_dir)

# Create balanced subsets for training (15,000) and testing (5,000)
train_subset_images, train_subset_labels = create_balanced_subset(train_images, train_labels, 1500)  # 1500 per class
test_subset_images, test_subset_labels = create_balanced_subset(test_images, test_labels, 500)  # 500 per class

# Optional: Shuffle the datasets if needed
train_indices = np.arange(len(train_subset_images))
test_indices = np.arange(len(test_subset_images))
np.random.shuffle(train_indices)
np.random.shuffle(test_indices)

train_subset_images = train_subset_images[train_indices]
train_subset_labels = train_subset_labels[train_indices]
test_subset_images = test_subset_images[test_indices]
test_subset_labels = test_subset_labels[test_indices]

print(f"Training set:: {train_subset_images.shape}, {train_subset_labels.shape}, dtype: {train_subset_images.dtype}")
print(f"Testing set:: {test_subset_images.shape}, {test_subset_labels.shape}, dtype: {test_subset_images.dtype}")


In [22]:
import numpy as np

def add_poisson_noise(images, noise_level=1.0):
    """
    Adds Poisson noise to images.

    Parameters:
    - images: NumPy array of shape (N, H, W, C), where N is the number of images,
              H is the height, W is the width, and C is the number of channels.
    - noise_level: A float specifying the scaling factor for the noise. Higher values
                  result in stronger noise.

    Returns:
    - noisy_images: NumPy array with Poisson noise added.
    """
    # Ensure the images are properly scaled for adding noise
    noisy_images = []
    
    # Loop over each image
    for img in images:
        # Ensure that image values are in the [0, 255] range (or [0, 1], depending on your input)
        # This step assumes your images are in the range [0, 1]. If they are [0, 255], adjust accordingly.
        img = np.clip(img, 0, 1)
        
        # Scale the image by noise_level, add Poisson noise, then scale back
        scaled_img = img * 255 * noise_level
        noisy_img = np.random.poisson(scaled_img) / (255 * noise_level)
        
        # Clip to maintain valid image values
        noisy_img = np.clip(noisy_img, 0, 1)
        
        noisy_images.append(noisy_img)
    
    return np.array(noisy_images)

import numpy as np
from scipy.ndimage import gaussian_filter
from PIL import Image
import matplotlib.pyplot as plt

def psf_blur_add_noise(image: np.ndarray, sigma: float = 1.0, noise_level: float = 0.01) -> np.ndarray:
    """
    Apply PSF-like blurring to the image using a Gaussian filter and add noise.

    Parameters:
    - image: np.ndarray, the input image (assumed to be a 2D grayscale or 3D color array).
    - sigma: float, the standard deviation for Gaussian kernel (blurring strength).
    - noise_level: float, the standard deviation of Gaussian noise to be added.

    Returns:
    - np.ndarray, the blurred and noisy image.
    """
    if image.ndim == 3:  # Color image
        blurred_image = np.zeros_like(image)
        for i in range(image.shape[2]):  # Apply blur to each channel separately
            blurred_image[..., i] = gaussian_filter(image[..., i], sigma=sigma)
    else:  # Grayscale image
        blurred_image = gaussian_filter(image, sigma=sigma)
    
    # Generate Gaussian noise
    noise = np.random.normal(0, noise_level, image.shape)

    # Add noise to the blurred image
    noisy_blurred_image = blurred_image + noise

    # Clip values to maintain valid pixel range [0, 1] for normalized image
    noisy_blurred_image = np.clip(noisy_blurred_image, 0, 1)

    return noisy_blurred_image



In [466]:
train_noisy = add_poisson_noise(train_subset_images, 0.05)
test_noisy = add_poisson_noise(test_subset_images, 0.05)

In [None]:
train_PSF = [psf_blur_add_noise(train_subset_image, sigma=2.0, noise_level=0.00) for train_subset_image in train_subset_images]
test_PSF = [psf_blur_add_noise(test_subset_image, sigma=2.0, noise_level=0.00) for test_subset_image in test_subset_images]