# Linoy Ketashvili - 316220235
# Alon Helvits - 315531087

In [1]:
import os
from PIL import Image
import random

In [3]:
def crop_and_save_images(input_folder, output_folder, crop_size=(512, 512), crops_per_image=3):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for root, _, files in os.walk(input_folder):
        # Create the corresponding output directory
        output_subfolder = root.replace(input_folder, output_folder)
        if not os.path.exists(output_subfolder):
            os.makedirs(output_subfolder)

        for file in files:
            if file.endswith('.png'):
                img_path = os.path.join(root, file)
                img = Image.open(img_path)
                img_width, img_height = img.size

                for i in range(crops_per_image):
                    left = random.randint(0, img_width - crop_size[0])
                    top = random.randint(0, img_height - crop_size[1])
                    right = left + crop_size[0]
                    bottom = top + crop_size[1]
                    
                    crop = img.crop((left, top, right, bottom))
                    
                    output_filename = f"{os.path.splitext(file)[0]}_{i}.png"
                    output_path = os.path.join(output_subfolder, output_filename)
                    crop.save(output_path)


In [4]:
# Define the source and target directories
datasets = ['man_made', 'nature']
subsets = ['train', 'val', 'test']

for dataset in datasets:
    for subset in subsets:
        input_folder = f'dataset/{dataset}/{subset}'
        output_folder = f'dataset/{dataset}_cropped/{subset}'
        crop_and_save_images(input_folder, output_folder)


# Create smaller and even dataset

In [9]:
import os
import random
import shutil

# Paths
original_base_path = "dataset"
new_base_path = "dataset"
categories = ["man_made_cropped", "nature_cropped"]
new_categories = ["man_made_800", "nature_800"]
folders = ["train", "val", "test"]

# Number of images to sample
samples = {"train": 809, "val": 85, "test": 85}

def create_folder_structure(new_base_path, new_categories, folders):
    for category in new_categories:
        for folder in folders:
            path = os.path.join(new_base_path, category, folder)
            os.makedirs(path, exist_ok=True)

def sample_and_copy_images(original_base_path, new_base_path, categories, new_categories, folders, samples):
    for category, new_category in zip(categories, new_categories):
        for folder in folders:
            original_folder_path = os.path.join(original_base_path, category, folder)
            new_folder_path = os.path.join(new_base_path, new_category, folder)
            
            # List all image files in the folder
            all_images = os.listdir(original_folder_path)
            
            # Randomly sample the required number of images
            sampled_images = random.sample(all_images, samples[folder])
            
            # Copy each sampled image to the new folder
            for image in sampled_images:
                original_image_path = os.path.join(original_folder_path, image)
                new_image_path = os.path.join(new_folder_path, image)
                shutil.copy2(original_image_path, new_image_path)

if __name__ == "__main__":
    # Create the new folder structure
    create_folder_structure(new_base_path, new_categories, folders)
    
    # Sample and copy images
    sample_and_copy_images(original_base_path, new_base_path, categories, new_categories, folders, samples)

    print("Sampling and copying completed!")


Sampling and copying completed!


In [10]:
import os
import random
import shutil

# Paths
base_path = "dataset"
new_combined_path = os.path.join(base_path, "combined_800")
categories = ["man_made_800", "nature_800"]
folders = ["train", "val", "test"]

# Number of images to sample from each category
samples = {"train": 390, "val": 42, "test": 42}

def create_combined_folder_structure(base_path, new_combined_path, folders):
    for folder in folders:
        path = os.path.join(new_combined_path, folder)
        os.makedirs(path, exist_ok=True)

def sample_and_copy_combined_images(base_path, new_combined_path, categories, folders, samples):
    for folder in folders:
        combined_images = []
        
        for category in categories:
            folder_path = os.path.join(base_path, category, folder)
            
            # List all image files in the folder
            all_images = os.listdir(folder_path)
            
            # Randomly sample the required number of images
            sampled_images = random.sample(all_images, samples[folder])
            
            # Collect paths to be copied
            for image in sampled_images:
                original_image_path = os.path.join(folder_path, image)
                combined_images.append(original_image_path)
        
        # Copy the combined sampled images to the new folder
        new_folder_path = os.path.join(new_combined_path, folder)
        for image_path in combined_images:
            image_name = os.path.basename(image_path)
            new_image_path = os.path.join(new_folder_path, image_name)
            shutil.copy2(image_path, new_image_path)

if __name__ == "__main__":
    # Create the combined folder structure
    create_combined_folder_structure(base_path, new_combined_path, folders)
    
    # Sample and copy combined images
    sample_and_copy_combined_images(base_path, new_combined_path, categories, folders, samples)

    print("Combined sampling and copying completed!")


Combined sampling and copying completed!
