In [15]:
from sklearn.model_selection import train_test_split
import os
from PIL import Image

In [16]:
# Path for Images and Masks.
image_path = os.path.abspath('data/dataset/semantic_drone_dataset/original_images')
mask_path = os.path.abspath('data/dataset/semantic_drone_dataset/label_images_semantic')

In [17]:
# Extract all Images & Masks.
image_files = [f for f in os.listdir(image_path) if f.endswith('.jpg')]
mask_files = [f for f in os.listdir(mask_path) if f.endswith('.png')]

In [18]:
# Sort Files for Alignment.
image_files.sort()
mask_files.sort()

In [20]:
# Create Paths to Each Image & Masks.
image_paths = [os.path.join(image_path, f) for f in image_files]
mask_paths = [os.path.join(mask_path, f) for f in mask_files]

In [21]:
# Split Data into Training & Testing (80-20 Split).
x_train, x_test, y_train, y_test = train_test_split(image_paths, mask_paths, test_size=0.2, random_state=42)

In [22]:
# Function to Load Images & Masks.
def load_image_mask(image_path, mask_path):
    image = Image.open(image_path)
    mask = Image.open(mask_path)
    return image, mask

In [23]:
# Loading Training & Testing Data.
train_data = [load_image_mask(img, mask) for img, mask in zip(x_train, y_train)]
test_data = [load_image_mask(img, mask) for img, mask in zip(x_test, y_test)]

In [24]:
# Hashmap of Directories to Clean Data.
output_directory = {
    "train_images": "advanced_data/x_train",
    "train_masks": "advanced_data/y_train",
    "test_images": "advanced_data/x_test",
    "test_masks": "advanced_data/y_test"
}

# Create Directories if they don't exist.
for path in output_directory.values():
    os.makedirs(path, exist_ok=True)

In [25]:
# Function to Save Images into Directories.
def save_images_masks(dataset, image_dir, mask_dir, checkpoint_file='checkpoint.txt'):
    start_index = 0
    if os.path.exists(checkpoint_file):
        with open(checkpoint_file, 'r') as f:
            start_index = int(f.read().strip())
    
    for i, (image, mask) in enumerate(dataset[start_index:], start=start_index):
        try:
            image_path = os.path.join(image_dir, f'{i}.jpg')
            mask_path = os.path.join(mask_dir, f'{i}.png')
            
            image.save(image_path)
            mask.save(mask_path)
            
            with open(checkpoint_file, 'w') as f:
                f.write(str(i + 1))
                
        
        except Exception as e:
            print(f"Error saving image {i}: {e}")
            break

In [26]:
# Saving Training Data.
save_images_masks(train_data, output_directory["train_images"], output_directory["train_masks"])

In [28]:
# Set Back to 0 for Testing.
with open("checkpoint.txt", 'w') as f:
    f.write(str(0))

In [29]:
# Saving Testing Data.
save_images_masks(test_data, output_directory["test_images"], output_directory["test_masks"])

In [30]:
# Remove Checkpoint
if os.path.exists("checkpoint.txt"):
    os.remove("checkpoint.txt")