In [17]:
import numpy as np
from PIL import Image, ImageDraw
import os
import random

### Generating datasets

For this problem I have created image dataset consisting of macro-objects **{'X', 'I', 'O', 'H', 'T', 'L'}** made up of smaller micro-objects (small circles).

Various transformations have been applied to make the dataset more varied such as:
1) Background Noise
2) Jitters
3) Rotation

Some examples: 

!['H with background noise'](proximity_data/train_dataset/H_114.png)
!['I with rotation'](proximity_data/valid_dataset/I_153.png)


The density among the micro-objects has been gradually **decreased** as we move from the training data to validation data and finally testing data.

Also, **1000 images** each have been created for training, validation and testing.

In [13]:
# Function to create macro-objects using micro-objects (circles)
def create_image(macro_object_coords, img_size=(200, 200), density=1.0, save_path="image.png"):
    img = Image.new('RGB', img_size, 'white')
    draw = ImageDraw.Draw(img)
    
    # Adjust the spacing based on the density
    spacing_adjust = int(10 / density)  # Modify circle size and spacing based on density

    # Randomly decide which transformations to apply
    apply_jitter = np.random.rand() < 0.5  # 50% chance to apply jitter
    apply_rotation = np.random.rand() < 0.5  # 50% chance to apply rotation
    apply_noise = np.random.rand() < 0.5  # 50% chance to add noise

    jitter_amount = 5  # Maximum jitter amount
    rotation_angle = np.random.uniform(-10, 10) if apply_rotation else 0  # Rotate between -10 and 10 degrees

    # Draw circles at positions based on the macro-object coordinates
    for coord in macro_object_coords:
        # Apply jitter
        x_offset = np.random.randint(-jitter_amount, jitter_amount) if apply_jitter else 0
        y_offset = np.random.randint(-jitter_amount, jitter_amount) if apply_jitter else 0
        
        # Draw the circle with jitter and spacing adjustment
        draw.ellipse([coord[0] + x_offset, coord[1] + y_offset,
                      coord[0] + spacing_adjust + x_offset, coord[1] + spacing_adjust + y_offset],
                     outline='black', fill='gray')
    
    # Apply rotation if needed (rotate the whole image for simplicity)
    if apply_rotation:
        img = img.rotate(rotation_angle, expand=True)
    
    # Add noise (extra circles) if needed
    if apply_noise:
        noise_count = np.random.randint(10, 30)  # Randomly decide how much noise to add
        for _ in range(noise_count):
            x_noise = np.random.randint(0, img_size[0])
            y_noise = np.random.randint(0, img_size[1])
            draw.ellipse([x_noise, y_noise, x_noise + 5, y_noise + 5], outline='black', fill='lightgray')

    # Save the image to the specified path
    img.save(save_path)


In [10]:
# Define coordinates for different macro-objects
macro_object_coords_dict = {
    'I': [(50, 20), (50, 60), (50, 100), (50, 140), (50, 180)],
    'L': [(50, 20), (50, 60), (50, 100), (50, 140), (90, 180)],
    'T': [(30, 20), (70, 20), (110, 20), (70, 60), (70, 100)],
    'X': [(50, 50), (90, 50), (70, 70), (50, 90), (90, 90)],
    'H': [(50, 20), (50, 60), (50, 100), (90, 20), (90, 60), (90, 100), (70, 60)],
    'O': [(50, 20), (90, 20), (50, 100), (90, 100)]
}

In [39]:
# Function to generate datasets
def generate_dataset(dataset_type='train', density=1.0, img_size=(200, 200), num_images=100):
    base_path = f"./proximity_data/{dataset_type}_dataset/"
    os.makedirs(base_path, exist_ok=True)
    
    macro_objects = ['I', 'L', 'T', 'X', 'H', 'O']

    # Generate images for each macro-object
    labels = []
    for i in range(num_images):
        macro_object = np.random.choice(macro_objects)
        coords = macro_object_coords_dict[macro_object]
        
        # Introduce randomness to make each image unique
        density = 1.0
        if dataset_type == 'valid':
            density = np.random.uniform(0.8, 1.0)
        elif dataset_type == 'test':
            density = np.random.uniform(0.5, 1.0)

        save_path = base_path + f"{macro_object}_{i}.png"
        
        # Create and save the image
        create_image(coords, img_size=img_size, density=density, save_path=save_path)
        
        # Store the label (based on the macro-object type)
        labels.append(macro_objects.index(macro_object))
    
    # Return the labels (you may want to save these labels to a file)
    return labels

In [40]:
# Generate 1000 training and validation images and 1000 testing images
train_labels = generate_dataset('train', num_images=1000)
valid_labels = generate_dataset('valid', num_images=1000)
test_labels = generate_dataset('test', num_images=1000)