In [None]:
pip install --upgrade pip

In [None]:
pip install Pillow

In [None]:
pip install ipywidgets

In [None]:
pip install datasets

In [None]:
pip install datasets[vision]

In [None]:
# Accessing ImageNet from https://huggingface.co/datasets/ILSVRC/imagenet-1k

In [27]:
# pip uninstall huggingface_hub datasets

In [28]:
# pip install huggingface_hub datasets

In [9]:
# Login with Hugging face credentials (need to configure with access token)
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [1]:
from datasets import load_dataset

# Load the dataset

# Stream the dataset instead of downloading the whole thing
ds = load_dataset("imagenet-1k", streaming=True)

# Code below is the intial code to extract the images and print the labels as is, no perturbations.

In [3]:
# train_ds = ds['train']
# train_ds_iter = iter(train_ds)

# # Function to display a few images and their labels
# def preview_images(ds_iter, num_samples=5):
#     for _ in range(num_samples):
#         # Get the next sample
#         sample = next(ds_iter)
        
#         # Extract the image and label
#         img = sample['image']  # This is already a PIL image
#         label = sample['label']
        
#         # Display the image
#         img.show()  # This will open the image in the default image viewer
        
#         # Print the label
#         print(f"Label: {label}")

# # Preview the first 5 images and their labels
# preview_images(train_ds_iter, num_samples=5)

Label: 726
Label: 917
Label: 13
Label: 939
Label: 6


# Code below hosts three functions, then prints the resulting images plus the perturbations for 5 images

In [4]:
import numpy as np
from PIL import Image, ImageOps
import random

# 1. Noise Injection
def noise_injection(image, distribution='gaussian', mean=0, stddev=25):
    """Inject noise into the image."""
    img_array = np.array(image)

    # Gaussian noise
    if distribution == 'gaussian':
        noise = np.random.normal(mean, stddev, img_array.shape)
    
    # Uniform noise
    elif distribution == 'uniform':
        noise = np.random.uniform(-stddev, stddev, img_array.shape)
    
    else:
        raise ValueError("Supported distributions: 'gaussian', 'uniform'")

    # Add the noise and clip values to be in valid range
    noisy_image = img_array + noise
    noisy_image = np.clip(noisy_image, 0, 255).astype(np.uint8)
    
    return Image.fromarray(noisy_image)

# 2. Occlusion
def occlusion(image, percentage=0.2):
    """Zero out (black out) a portion of the image based on percentage."""
    img_array = np.array(image)
    height, width, _ = img_array.shape
    
    # Compute the occlusion dimensions
    occ_height = int(height * percentage)
    occ_width = int(width * percentage)
    
    # Randomly select the top-left corner of the occlusion
    top_left_x = random.randint(0, width - occ_width)
    top_left_y = random.randint(0, height - occ_height)
    
    # Apply occlusion (set to zero in the specified region)
    img_array[top_left_y:top_left_y + occ_height, top_left_x:top_left_x + occ_width] = 0
    
    return Image.fromarray(img_array)

# 3. Resolution Reduction
def resolution_reduction(image, block_size=2):
    """Reduce resolution by partitioning image into squares and averaging."""
    img_array = np.array(image)
    height, width, _ = img_array.shape
    
    # Downsample by block_size and take the mean of the blocks
    reduced_img_array = img_array.copy()
    
    for i in range(0, height, block_size):
        for j in range(0, width, block_size):
            # Define the block
            block = img_array[i:i+block_size, j:j+block_size]
            # Compute the average of the block
            avg_color = block.mean(axis=(0, 1)).astype(int)
            # Apply the average to the entire block
            reduced_img_array[i:i+block_size, j:j+block_size] = avg_color
    
    return Image.fromarray(reduced_img_array)

# Function to test the perturbations on 5 images
def test_perturbations(ds_iter, num_samples=5):
    for _ in range(num_samples):
        # Get the next sample
        sample = next(ds_iter)
        
        # Extract the image and label
        img = sample['image']  # PIL image
        label = sample['label']
        
        # Apply perturbations
        noisy_image = noise_injection(img, distribution='gaussian', mean=0, stddev=25)
        occluded_image = occlusion(img, percentage=0.2)
        reduced_resolution_image = resolution_reduction(img, block_size=2)
        
        # Display the original and the perturbed images
        print(f"Original Label: {label}")
        img.show(title="Original Image")
        
        print("Noisy Image:")
        noisy_image.show(title="Noisy Image")
        
        print("Occluded Image:")
        occluded_image.show(title="Occluded Image")
        
        print("Reduced Resolution Image:")
        reduced_resolution_image.show(title="Reduced Resolution Image")

# Stream the dataset
ds = load_dataset("imagenet-1k", streaming=True)
train_ds = ds['train']

# Initialize iterator for the 'train' split
train_ds_iter = iter(train_ds)

# Test the perturbations on 5 images
test_perturbations(train_ds_iter, num_samples=5)


Original Label: 726
Noisy Image:
Occluded Image:
Reduced Resolution Image:
Original Label: 917
Noisy Image:
Occluded Image:
Reduced Resolution Image:
Original Label: 13
Noisy Image:
Occluded Image:
Reduced Resolution Image:
Original Label: 939
Noisy Image:
Occluded Image:
Reduced Resolution Image:
Original Label: 6
Noisy Image:
Occluded Image:
Reduced Resolution Image:
