In [1]:
from pathlib import Path
from typing import List, Tuple, Dict, Optional, Union, Any
import numpy as np
from ultralytics import YOLO
import torch
from torch.utils.data import Dataset
import cv2
import albumentations as A
from albumentations.pytorch import ToTensorV2
from dataclasses import dataclass
import json
import yaml
import matplotlib.pyplot as plt
from tqdm import tqdm
import logging
from datetime import datetime

  check_for_updates()


In [2]:
# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('aerial_detection.log'),
        logging.StreamHandler()
    ]
)

In [None]:
@dataclass
class AerialModelConfig:
    img_size: int = 1024          # Larger size for aerial details
    tile_size: int = 1024         # Size for tiling large images
    tile_overlap: int = 128       # Overlap between tiles
    batch_size: int = 8           # Reduced due to larger images
    num_epochs: int = 100
    learning_rate: float = 0.01
    num_classes: int = 1
    device: str = "cuda" if torch.cuda.is_available() else "cpu"
    model_type: str = "yolov8l.pt"  # Larger model for complex features
    min_visibility: float = 0.15   # Minimum object visibility threshold
    cache_images: bool = True      # Cache images in memory for faster training
    save_period: int = 10          # Save checkpoint every N epochs
    project_name: str = "aerial_haul_road_detection"
    experiment_name: str = f"exp_{datetime.now().strftime('%Y%m%d_%H%M%S')}"

In [None]:
class ImageProcessor:
    """Handle preprocessing of aerial/satellite images"""

    @staticmethod
    def enhance_contrast(image: np.ndarray) -> np.ndarray:
        """Apply CLAHE contrast enhancement"""
        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
        lab = cv2.cvtColor(image, cv2.COLOR_RGB2LAB)
        lab[:, :, 0] = clahe.apply(lab[:, :, 0])
        return cv2.cvtColor(lab, cv2.COLOR_LAB2RGB)

    @staticmethod
    def tile_image(
        image: np.ndarray,
        tile_size: int,
        overlap: int
    ) -> List[Tuple[np.ndarray, Tuple[int, int]]]:
        """Split large images into overlapping tiles"""
        tiles = []
        h, w = image.shape[:2]

        for y in range(0, h-overlap, tile_size-overlap):
            for x in range(0, w-overlap, tile_size-overlap):
                end_y = min(y + tile_size, h)
                end_x = min(x + tile_size, w)
                tile = image[y:end_y, x:end_x]

                # Pad if tile is smaller than tile_size
                if tile.shape[0] != tile_size or tile.shape[1] != tile_size:
                    padded_tile = np.zeros(
                        (tile_size, tile_size, 3), dtype=np.uint8)
                    padded_tile[:tile.shape[0], :tile.shape[1], :] = tile
                    tile = padded_tile

                tiles.append((tile, (x, y)))

        return tiles

    @staticmethod
    def merge_predictions(
        tiles_predictions: List[Dict[str, Any]],
        original_size: Tuple[int, int],
        tile_size: int,
        overlap: int
    ) -> Dict[str, Any]:
        """Merge predictions from tiles back to original image size"""
        merged_boxes = []
        merged_scores = []
        merged_classes = []

        for pred, (x_offset, y_offset) in tiles_predictions:
            if pred.boxes.xyxy.shape[0] > 0:
                # Adjust coordinates based on tile position
                boxes = pred.boxes.xyxy.cpu().numpy()
                boxes[:, [0, 2]] += x_offset
                boxes[:, [1, 3]] += y_offset

                # Add predictions
                merged_boxes.extend(boxes)
                merged_scores.extend(pred.boxes.conf.cpu().numpy())
                merged_classes.extend(pred.boxes.cls.cpu().numpy())

        # Perform NMS on merged predictions
        if merged_boxes:
            merged_boxes = np.array(merged_boxes)
            merged_scores = np.array(merged_scores)
            merged_classes = np.array(merged_classes)

            # Convert to YOLO format for NMS
            merged_predictions = {
                'boxes': torch.from_numpy(merged_boxes),
                'scores': torch.from_numpy(merged_scores),
                'classes': torch.from_numpy(merged_classes)
            }
        else:
            merged_predictions = {
                'boxes': torch.zeros((0, 4)),
                'scores': torch.zeros(0),
                'classes': torch.zeros(0)
            }

        return merged_predictions

In [None]:
class Predictor:
    """Prediction class for aerial imagery"""

    def __init__(self, model_path: str, config: AerialModelConfig) -> None:
        self.model = YOLO(model_path)
        self.config = config

    def predict_large_image(
        self,
        image_path: str,
        conf_threshold: float = 0.25,
        iou_threshold: float = 0.45
    ) -> Dict[str, Any]:
        """Handle prediction for large aerial images using tiling"""
        # Load and preprocess image
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        original_size = image.shape[:2]

        # Enhanced contrast
        image = ImageProcessor.enhance_contrast(image)

        # Generate tiles
        tiles = ImageProcessor.tile_image(
            image,
            self.config.tile_size,
            self.config.tile_overlap
        )

        # Predict on each tile
        tiles_predictions = []
        for tile, (x, y) in tiles:
            prediction = self.model.predict(
                tile,
                conf=conf_threshold,
                iou=iou_threshold,
                verbose=False
            )[0]

            tiles_predictions.append((prediction, (x, y)))

        # Merge predictions
        merged_predictions = ImageProcessor.merge_predictions(
            tiles_predictions,
            original_size,
            self.config.tile_size,
            self.config.tile_overlap
        )

        return merged_predictions

    def visualize_predictions(
        self,
        image_path: str,
        predictions: Dict[str, torch.Tensor],
        output_path: str,
        class_names: List[str]
    ) -> None:
        """Visualize predictions on the image"""
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # Draw boxes
        for box, score, class_id in zip(
            predictions['boxes'],
            predictions['scores'],
            predictions['classes']
        ):
            x1, y1, x2, y2 = box.cpu().numpy().astype(int)
            class_id = int(class_id)

            # Draw box
            cv2.rectangle(
                image,
                (x1, y1),
                (x2, y2),
                (0, 255, 0),
                2
            )

            # Add label
            label = f"{class_names[class_id]} {score:.2f}"
            cv2.putText(
                image,
                label,
                (x1, y1 - 10),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.5,
                (0, 255, 0),
                2
            )

        # Save result
        plt.figure(figsize=(20, 20))
        plt.imshow(image)
        plt.axis('off')
        plt.savefig(output_path, bbox_inches='tight', pad_inches=0)
        plt.close()

In [None]:
config = AerialModelConfig()

    # Setup logging
logging.info(
    f"Starting aerial haul road detection pipeline with config: {config}")

class_names = ["haul_road"]

# Example of prediction on a large image
predictor = Predictor(
    model_path=f"{
        config.project_name}/{config.experiment_name}/weights/best.pt",
    config=config
)

# Predict on a test image
test_image_path = "path/to/test/image.jpg"
predictions = predictor.predict_large_image(
    test_image_path,
    conf_threshold=0.25,
    iou_threshold=0.45
)

# Visualize results
predictor.visualize_predictions(
    test_image_path,
    predictions,
    output_path=f"""{
        config.project_name}/{config.experiment_name}/predictions/test_prediction.jpg""",
    class_names=class_names
)