In [None]:
import os
import re
import glob
import numpy as np
import random
import math
import cv2
from collections import Counter
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from tqdm.notebook import tqdm_notebook as tqdm
from sklearn.model_selection import train_test_split

In [None]:
def parse_label_filename(filename):
    """
    Parse the label filename to extract metadata.
    Format: dataset_imagetype_tileID_minX_minY_windowsize
    """
    basename = os.path.basename(filename)
    basename = os.path.splitext(basename)[0]
    parts = basename.split('_')
    
    # Assuming the format is consistent
    if len(parts) == 10:
        dataset = parts[0]
        image_type = parts[1]
        tile_id = parts[2]
        min_x = int(parts[5])
        min_y = int(parts[7])
        window_size = int(parts[9])
        
        return {
            'dataset': dataset,
            'image_type': image_type,
            'tile_id': tile_id,
            'min_x': min_x,
            'min_y': min_y,
            'window_size': window_size
        }
    else:
        raise ValueError(f"Filename {basename} does not match the expected format")

In [None]:
def read_label_file(label_path):
    """
    Read a label file and return objects in the format:
    [category, x_center, y_center, width, height]
    """
    objects = []
    
    with open(label_path, 'r') as f:
        for line in f:
            line = line.strip()
            if line:
                parts = line.split()
                if len(parts) == 5:
                    # Convert all values to float
                    category = int(parts[0])
                    x_center = float(parts[1])
                    y_center = float(parts[2])
                    width = float(parts[3])
                    height = float(parts[4])
                    
                    objects.append([category, x_center, y_center, width, height])
    
    return objects

In [None]:
def get_image_path_from_label(label_path, image_path):
    """
    Derive the image path from the label path
    """
    label_meta = parse_label_filename(label_path)
    image_type = label_meta['image_type']
    
    # Use the same basename but with image extension
    basename = os.path.basename(label_path)
    basename = os.path.splitext(basename)[0]
    
    # Check for common image extensions
    for ext in ['.tif', '.jpg', '.png']:
        potential_image_path = os.path.join(image_path, f"{basename}{ext}")
        if os.path.exists(potential_image_path):
            return potential_image_path
            
    # If no direct match, search for files with similar pattern
    pattern = f"{basename.split('_')[0]}_{image_type}_{label_meta['tile_id']}_{label_meta['min_x']}_{label_meta['min_y']}_{label_meta['window_size']}"
    matches = glob.glob(os.path.join(image_path, f"{pattern}*"))
    
    if matches:
        return matches[0]
    else:
        raise FileNotFoundError(f"Could not find image file matching label: {basename}")

In [None]:
def calculate_geocoordinates(label, label_meta, geotransform):
    """
    Calculate geocoordinates for a solar panel object using the label and geotransform
    """
    # Unnormalize the coordinates (assuming chip size from label_meta)
    chip_size = label_meta['window_size']
    
    x_center_px = label[1] * chip_size
    y_center_px = label[2] * chip_size
    width_px = label[3] * chip_size
    height_px = label[4] * chip_size
    
    # Convert to coordinates in the full tile
    x_tile = label_meta['min_x'] + x_center_px
    y_tile = label_meta['min_y'] + y_center_px
    
    # Apply geotransform to get geocoordinates
    geo_x = geotransform[0] + (x_tile * geotransform[1])
    geo_y = geotransform[3] + (y_tile * geotransform[5])
    
    return {
        'geo_x': geo_x,
        'geo_y': geo_y,
        'width_m': width_px * abs(geotransform[1]),
        'height_m': height_px * abs(geotransform[5])
    }

In [None]:
def transform_labels(original_labels, augmentation_type, image_width, image_height):
    """
    Transform object detection labels for rotated and flipped images.
    
    Args:
        original_labels: List of labels in format [category, x_center_norm, y_center_norm, width_norm, height_norm]
        augmentation_type: String indicating type of augmentation ('rotate_90', 'rotate_180', 'rotate_270',
                          'flip_h', 'flip_h_rotate_90', 'flip_h_rotate_180', 'flip_h_rotate_270')
        image_width: Width of the image (typically same as height for square images)
        image_height: Height of the image
        
    Returns:
        List of transformed labels
    """
    transformed_labels = []
    
    for label in original_labels:
        category, x_center, y_center, width, height = label
        new_label = [category, 0, 0, 0, 0]  # Initialize new label
        
        # Handle different augmentation types
        if augmentation_type == 'rotate_90':
            # For 90° rotation: (x, y) -> (y, 1-x)
            new_label[1] = y_center
            new_label[2] = 1.0 - x_center
            # Switch width and height
            new_label[3] = height
            new_label[4] = width
            
        elif augmentation_type == 'rotate_180':
            # For 180° rotation: (x, y) -> (1-x, 1-y)
            new_label[1] = 1.0 - x_center
            new_label[2] = 1.0 - y_center
            # Width and height remain the same
            new_label[3] = width
            new_label[4] = height
            
        elif augmentation_type == 'rotate_270':
            # For 270° rotation: (x, y) -> (1-y, x)
            new_label[1] = 1.0 - y_center
            new_label[2] = x_center
            # Switch width and height
            new_label[3] = height
            new_label[4] = width
            
        elif augmentation_type == 'flip_h':
            # For horizontal flip: (x, y) -> (1-x, y)
            new_label[1] = 1.0 - x_center
            new_label[2] = y_center
            # Width and height remain the same
            new_label[3] = width
            new_label[4] = height
            
        elif augmentation_type == 'flip_h_rotate_90':
            # First flip horizontally, then rotate 90°
            # Flip: (x, y) -> (1-x, y)
            # Then rotate 90°: (1-x, y) -> (y, x)
            new_label[1] = y_center
            new_label[2] = x_center
            # Switch width and height
            new_label[3] = height
            new_label[4] = width
            
        elif augmentation_type == 'flip_h_rotate_180':
            # First flip horizontally, then rotate 180°
            # Flip: (x, y) -> (1-x, y)
            # Then rotate 180°: (1-x, y) -> (x, 1-y)
            new_label[1] = x_center
            new_label[2] = 1.0 - y_center
            # Width and height remain the same
            new_label[3] = width
            new_label[4] = height
            
        elif augmentation_type == 'flip_h_rotate_270':
            # First flip horizontally, then rotate 270°
            # Flip: (x, y) -> (1-x, y)
            # Then rotate 270°: (1-x, y) -> (1-y, 1-x)
            new_label[1] = 1.0 - y_center
            new_label[2] = 1.0 - x_center
            # Switch width and height
            new_label[3] = height
            new_label[4] = width
            
        else:
            # Original image - no transformation
            new_label = label.copy()
            
        transformed_labels.append(new_label)
        
    return transformed_labels

In [None]:
def augment_images_and_labels(image_array, original_labels):
    """
    Perform image augmentation (rotation and flipping) and transform the labels accordingly.
    
    Args:
        image_array: The original image as a numpy array
        original_labels: List of labels in format [category, x_center_norm, y_center_norm, width_norm, height_norm]
        
    Returns:
        List of tuples containing (augmented_image, transformed_labels)
    """
    # Make a copy of the original image to avoid modifying it
    original = image_array.copy()
    
    # Get image dimensions
    height, width = image_array.shape[:2]
    
    # Initialize list to store augmented images and their labels
    augmented_data = []
    
    # Add original image and its labels
    augmented_data.append((original, original_labels))
    
    # Define augmentation types
    augmentation_types = [
        'rotate_90',
        'rotate_180',
        'rotate_270',
        'flip_h',
        'flip_h_rotate_90',
        'flip_h_rotate_180',
        'flip_h_rotate_270'
    ]
    
    # Process each augmentation type
    for idx, aug_type in enumerate(augmentation_types):
        if aug_type == 'rotate_90':
            # 90 degree rotation
            rotation_matrix = cv2.getRotationMatrix2D((width/2, height/2), 90, 1)
            rotated = cv2.warpAffine(image_array, rotation_matrix, (width, height))
            transformed_labels = transform_labels(original_labels, aug_type, width, height)
            augmented_data.append((rotated, transformed_labels))
            
        elif aug_type == 'rotate_180':
            # 180 degree rotation
            rotation_matrix = cv2.getRotationMatrix2D((width/2, height/2), 180, 1)
            rotated = cv2.warpAffine(image_array, rotation_matrix, (width, height))
            transformed_labels = transform_labels(original_labels, aug_type, width, height)
            augmented_data.append((rotated, transformed_labels))
            
        elif aug_type == 'rotate_270':
            # 270 degree rotation
            rotation_matrix = cv2.getRotationMatrix2D((width/2, height/2), 270, 1)
            rotated = cv2.warpAffine(image_array, rotation_matrix, (width, height))
            transformed_labels = transform_labels(original_labels, aug_type, width, height)
            augmented_data.append((rotated, transformed_labels))
            
        elif aug_type == 'flip_h':
            # Horizontal flip
            flipped = cv2.flip(image_array, 1)
            transformed_labels = transform_labels(original_labels, aug_type, width, height)
            augmented_data.append((flipped, transformed_labels))
            
        elif aug_type.startswith('flip_h_rotate'):
            # Get the base horizontally flipped image
            flipped = cv2.flip(image_array, 1)
            
            if aug_type == 'flip_h_rotate_90':
                # Flipped + 90 degree rotation
                rotation_matrix = cv2.getRotationMatrix2D((width/2, height/2), 90, 1)
                flipped_rotated = cv2.warpAffine(flipped, rotation_matrix, (width, height))
                
            elif aug_type == 'flip_h_rotate_180':
                # Flipped + 180 degree rotation
                rotation_matrix = cv2.getRotationMatrix2D((width/2, height/2), 180, 1)
                flipped_rotated = cv2.warpAffine(flipped, rotation_matrix, (width, height))
                
            elif aug_type == 'flip_h_rotate_270':
                # Flipped + 270 degree rotation
                rotation_matrix = cv2.getRotationMatrix2D((width/2, height/2), 270, 1)
                flipped_rotated = cv2.warpAffine(flipped, rotation_matrix, (width, height))
            
            transformed_labels = transform_labels(original_labels, aug_type, width, height)
            augmented_data.append((flipped_rotated, transformed_labels))
    
    return augmented_data

In [None]:
def load_dataset(image_path, label_path):
    """
    Load the solar panel dataset including images and labels
    """
    # Get geotransforms from the provided information
    geotransforms = {
        '1': (307670.04, 0.31, 0.0, 5434427.100000001, 0.0, -0.31),
        '2': (312749.07999999996, 0.31, 0.0, 5403952.860000001, 0.0, -0.31),
        '3': (312749.07999999996, 0.31, 0.0, 5363320.540000001, 0.0, -0.31)
    }
    
    # Find all label files
    label_files_native = glob.glob(os.path.join(label_path, 'labels_native', '*.txt'))
    
    print(f"Found {len(label_files_native)} native resolution label files")
    
    # Prepare dataset
    dataset = []
    
    # Process native resolution data
    print("Processing native resolution data...")
    for label_file in tqdm(label_files_native):

        try:
            # Parse label filename
            label_meta = parse_label_filename(label_file)
            
            # Read label file
            objects = read_label_file(label_file)
            
            # Get corresponding image path
            img_path = get_image_path_from_label(label_file, image_path)
            
            # Load image
            image = cv2.imread(img_path)
            if image is None:
                print(f"Warning: Could not load image {img_path}")
                continue
            
            # Get geotransform for this tile
            geotransform = geotransforms.get(label_meta['tile_id'])
            if not geotransform:
                print(f"Warning: No geotransform found for tile {label_meta['tile_id']}")
                continue
            
            # Calculate geocoordinates for each object
            geo_objects = []
            for obj in objects:
                geo_obj = calculate_geocoordinates(obj, label_meta, geotransform)
                geo_objects.append({
                    'category': obj[0],
                    'x_center_norm': obj[1],
                    'y_center_norm': obj[2],
                    'width_norm': obj[3],
                    'height_norm': obj[4],
                    'geo_x': geo_obj['geo_x'],
                    'geo_y': geo_obj['geo_y'],
                    'width_m': geo_obj['width_m'],
                    'height_m': geo_obj['height_m']
                })

            aug_images_and_labels = augment_images_and_labels(image, objects)

            for (image, objects) in aug_images_and_labels:
                
                # Add to dataset
                dataset.append({
                    'image_path': img_path,
                    'label_path': label_file,
                    'metadata': label_meta,
                    'objects': objects,
                    'geo_objects': geo_objects,
                    'image': image
                })
        
        except Exception as e:
            print(f"Error processing {label_file}: {e}")
    
    print(f"Loaded {len(dataset)} native images with {sum(len(item['objects']) for item in dataset)} objects")
    
    return dataset

In [None]:
def visualize_sample(dataset, sample_count=3):
    """
    Visualize a few samples from the dataset using matplotlib
    """
    for i in range(min(sample_count, len(dataset))):
        sample = dataset[i]
        image = sample['image'].copy()
        
        # Get image dimensions
        h, w = image.shape[:2]
        
        # Create figure and axis for plotting
        fig, ax = plt.subplots(1, figsize=(12, 12))
        
        # Display the image
        ax.imshow(image)
        
        # Draw bounding boxes
        for obj in sample['objects']:
            category, x_center, y_center, width, height = obj
            
            # Convert normalized coordinates to pixel coordinates
            x_center_px = int(x_center * w)
            y_center_px = int(y_center * h)
            width_px = int(width * w)
            height_px = int(height * h)
            
            # Calculate bounding box coordinates
            x1 = int(x_center_px - width_px / 2)
            y1 = int(y_center_px - height_px / 2)
            x2 = int(x_center_px + width_px / 2)
            y2 = int(y_center_px + height_px / 2)
            
            # Create a rectangle patch and add it to the plot
            rect = patches.Rectangle((x1, y1), width_px, height_px, linewidth=2, edgecolor='green', facecolor='none')
            ax.add_patch(rect)
        
        # Title with the image name
        ax.set_title(f"{os.path.basename(sample['image_path'])}")
        ax.axis('off')  # Turn off axes
        
        # Display the image with bounding boxes
        plt.show()

In [None]:
def prepare_for_yolo(dataset, output_dir):
    """
    Prepare the dataset for YOLOv4 training
    """
    os.makedirs(output_dir, exist_ok=True)
    os.makedirs(os.path.join(output_dir, 'images', 'train'), exist_ok=True)
    os.makedirs(os.path.join(output_dir, 'images', 'val'), exist_ok=True)
    os.makedirs(os.path.join(output_dir, 'images', 'test'), exist_ok=True)
    os.makedirs(os.path.join(output_dir, 'labels', 'train'), exist_ok=True)
    os.makedirs(os.path.join(output_dir, 'labels', 'val'), exist_ok=True)
    os.makedirs(os.path.join(output_dir, 'labels', 'test'), exist_ok=True)
    
    # Combine native and HD datasets
    all_data = dataset
    
    # Shuffle data
    train_data, test_data = train_test_split(all_data, train_size = 0.8, random_state=42)
    val_data, test_data = train_test_split(test_data, train_size = 0.5, random_state=42)
    
    print(f"Preparing {len(train_data)} training samples, {len(val_data)} validation samples and {len(test_data)} test samples")
    
    # Process train data
    for idx, item in enumerate(tqdm(train_data, desc="Processing train data")):
        # Copy image
        img_filename = f"train_{idx}.jpg"
        cv2.imwrite(os.path.join(output_dir, 'images', 'train', img_filename), item['image'])
        
        # Write label
        with open(os.path.join(output_dir, 'labels', 'train', f"train_{idx}.txt"), 'w') as f:
            for obj in item['objects']:
                f.write(f"{obj[0]} {obj[1]} {obj[2]} {obj[3]} {obj[4]}\n")
    
    # Process validation data
    for idx, item in enumerate(tqdm(val_data, desc="Processing validation data")):
        # Copy image
        img_filename = f"val_{idx}.jpg"
        cv2.imwrite(os.path.join(output_dir, 'images', 'val', img_filename), item['image'])
        
        # Write label
        with open(os.path.join(output_dir, 'labels', 'val', f"val_{idx}.txt"), 'w') as f:
            for obj in item['objects']:
                f.write(f"{obj[0]} {obj[1]} {obj[2]} {obj[3]} {obj[4]}\n")

    # Process test data
    for idx, item in enumerate(tqdm(test_data, desc="Processing test data")):
        # Copy image
        img_filename = f"test_{idx}.jpg"
        cv2.imwrite(os.path.join(output_dir, 'images', 'test', img_filename), item['image'])
        
        # Write label
        with open(os.path.join(output_dir, 'labels', 'test', f"test_{idx}.txt"), 'w') as f:
            for obj in item['objects']:
                f.write(f"{obj[0]} {obj[1]} {obj[2]} {obj[3]} {obj[4]}\n")
    
    config = {
        'path': 'dataset',
        'train': os.path.join('train', 'images'),
        'val': os.path.join('val', 'images'),
        'test': os.path.join('test', 'images'),
        'nc': 1,
        'names': ['solar panel']
    }
    
    yaml_path = os.path.join('datasets', organized_dir, 'dataset.yaml')
    with open(yaml_path, 'w') as f:
        yaml.dump(config, f)
    
    return yaml_path

In [None]:
image_path = "image_chips_native"  # Replace with your image directory path
label_path = "labels"  # Replace with your labels directory path
output_dir = "datasets/dataset"        # Directory to store prepared dataset

# Load the dataset
dataset = load_dataset(image_path, label_path)

# Prepare for YOLOv4 training
data_yaml_path = prepare_for_yolo(dataset, output_dir)

print(f"Dataset prepared. Use {data_yaml_path} for YOLO training.")

In [None]:
visualize_sample(dataset, sample_count = 3)

In [3]:
import os

label_path = "labels/labels_native"  # Update this path

classes = set()

for file in os.listdir(label_path):
    if file.endswith(".txt"):
        with open(os.path.join(label_path, file), "r") as f:
            for line in f:
                class_id = line.split()[0]  # Get first value (class ID)
                classes.add(int(class_id))

print("Classes found in dataset:", sorted(classes))

Classes found in dataset: [0, 1, 2]
