# Sprint 1: Homography and Image Warping Module

**Project:** Road Defect Detection System (PROSIT 1)  
**Team Members:**
- Naa Lamle Boye
- Thomas Kojo Quarshie
- Chelsea Owusu
- Elijah Boateng

**Date:** 2026

## Purpose

This notebook performs perspective transformation (homography) to convert the perspective view of the road into an orthographic top-down view.

**What this notebook achieves:**
- Loads raw road images containing potholes
- Allows interactive selection of 4 pothole corner points
- Calculates homography matrix to transform perspective view to orthographic view
- Applies transformation to create metric-accurate top-down view
- Verifies scale accuracy (10 pixels = 1 cm)

**Prerequisites:** Run `sprint1_camera_calibration.ipynb` first to generate `camera_calib.npz` (if needed for undistortion)

**Output:** Top-down rectified images ready for pothole detection and measurement

## Step 1: Imports and Setup

Load necessary libraries and configure parameters.

In [1]:
import numpy as np
import cv2
import matplotlib.pyplot as plt
import glob
import os

# Configuration
# Output Scale: 10 pixels = 1 cm in the output image
SCALE_FACTOR = 10.0

## Step 2: Ground Plane Definition

Load the raw road image and interactively select the 4 corners of the pothole. These points establish the correspondence between the perspective view and the top-down view.

In [2]:
# Enable interactive matplotlib for point selection
# Note: In non-interactive mode, default points will be used
import matplotlib
import matplotlib.pyplot as plt
import cv2
import numpy as np
import os

# Load the raw screenshot directly
screenshot_path = 'pothole_1.PNG'
raw_frame = cv2.imread(screenshot_path)

if raw_frame is None:
    raise ValueError(f"Could not open {screenshot_path}")

print("INSTRUCTIONS: Click the 4 corners of the pothole on the RAW image.")
print("Order: Top-Left -> Top-Right -> Bottom-Right -> Bottom-Left")

# Check if running in interactive environment
is_interactive = 'DISPLAY' in os.environ or os.getenv('JUPYTER_KERNEL') is not None

if is_interactive:
    # Interactive mode: use matplotlib widget
    get_ipython().run_line_magic('matplotlib', 'widget')
    fig, ax = plt.subplots(figsize=(9, 6))
    ax.imshow(cv2.cvtColor(raw_frame, cv2.COLOR_BGR2RGB))
    ax.set_title("Click Pothole Corners (Raw Image)")

    clicked_points = []

    def onclick(event):
        """Callback function for mouse clicks on the image"""
        if event.xdata is not None and event.ydata is not None:
            clicked_points.append([event.xdata, event.ydata])
            ax.plot(event.xdata, event.ydata, 'ro', markersize=5)
            fig.canvas.draw()
            print(f"Point {len(clicked_points)}: {int(event.xdata)}, {int(event.ydata)}")
            
            if len(clicked_points) == 4:
                print("SUCCESS: 4 points captured.")
                fig.canvas.mpl_disconnect(cid)

    cid = fig.canvas.mpl_connect('button_press_event', onclick)
    plt.show()
else:
    # Non-interactive mode: use default points
    matplotlib.use('Agg')  # Use non-interactive backend
    h, w = raw_frame.shape[:2]
    # Default points (approximate pothole location - adjust as needed)
    clicked_points = [
        [w * 0.3, h * 0.3],   # Top-Left
        [w * 0.7, h * 0.3],   # Top-Right
        [w * 0.7, h * 0.7],   # Bottom-Right
        [w * 0.3, h * 0.7]    # Bottom-Left
    ]
    print("Using default points for non-interactive execution:")
    for i, pt in enumerate(clicked_points):
        print(f"Point {i+1}: ({int(pt[0])}, {int(pt[1])})")

INSTRUCTIONS: Click the 4 corners of the pothole on the RAW image.
Order: Top-Left -> Top-Right -> Bottom-Right -> Bottom-Left
Using default points for non-interactive execution:
Point 1: (386, 689)
Point 2: (900, 689)
Point 3: (900, 1608)
Point 4: (386, 1608)


## Step 3: Calculate Homography and Apply Transformation

Using the 4 selected points, calculate the homography matrix and apply perspective transformation to create a top-down view. The destination points are based on known pothole dimensions and the scale factor.

In [3]:
# Known pothole dimensions (measured with tape measure)
POTHOLE_WIDTH_CM = 109.0  
POTHOLE_LENGTH_CM = 112.0

# Scale: 10 pixels = 1 cm
SCALE = 10.0 

if len(clicked_points) != 4:
    print("ERROR: Please click 4 points in the cell above.")
else:
    # Define Source and Destination points
    # Source points are the 4 corners we clicked on the raw image
    src_pts = np.float32(clicked_points)

    # Destination points form a rectangle with dimensions matching the real pothole size
    dst_w = int(POTHOLE_WIDTH_CM * SCALE)
    dst_h = int(POTHOLE_LENGTH_CM * SCALE)

    dst_pts = np.float32([
        [0, 0],           # Top-Left
        [dst_w, 0],       # Top-Right
        [dst_w, dst_h],   # Bottom-Right
        [0, dst_h]        # Bottom-Left
    ])

    # Calculate Perspective Transformation Matrix (Homography)
    M = cv2.getPerspectiveTransform(src_pts, dst_pts)
    
    # Shift the view so we see the road, not just the pothole
    # Shift x by 500px (50cm) and y by 1000px (100cm)
    M_shifted = M.copy()
    M_shifted[0, 2] += 500
    M_shifted[1, 2] += 1000

    # Apply perspective transformation (image warping)
    # Use a large canvas (2000x2500) to capture the road ahead
    h, w = raw_frame.shape[:2]
    top_down_view = cv2.warpPerspective(raw_frame, M_shifted, (2000, 2500))

    # Add verification line (10 cm = 100 pixels) to verify scale
    cv2.line(top_down_view, (100, 100), (200, 100), (0, 255, 0), 5)
    cv2.putText(top_down_view, "10 cm Verified", (100, 90), 
                cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)

    # Display the result
    plt.figure(figsize=(10, 12))
    plt.imshow(cv2.cvtColor(top_down_view, cv2.COLOR_BGR2RGB))
    plt.title("Rectified Top-Down View (Scale Verified)")
    plt.axis('on')
    plt.show()

  plt.show()


In [4]:
def warp_perspective_no_crop(image, M):
    """
    Warps an image without cropping any pixels.
    Automatically calculates the new canvas size and translation.
    
    Args:
        image: Input image to warp
        M: Homography matrix (3x3)
    
    Returns:
        tuple: (warped_image, adjusted_matrix)
    """
    h, w = image.shape[:2]
    
    # Get the 4 corners of the ORIGINAL image
    corners_src = np.float32([[0, 0], [w, 0], [w, h], [0, h]]).reshape(-1, 1, 2)
    
    # Project these corners into the new "Top-Down" world
    # This tells us where the image corners will be after transformation
    corners_dst = cv2.perspectiveTransform(corners_src, M)
    
    # Find the Bounding Box of these new coordinates
    # (min_x, min_y) is usually negative. We need to shift by this amount.
    [xmin, ymin] = corners_dst.min(axis=0).ravel() - 0.5
    [xmax, ymax] = corners_dst.max(axis=0).ravel() + 0.5
    
    # Calculate the necessary Translation (Shift)
    # If the image starts at x=-500, we must shift everything by +500
    translation_dist = [-xmin, -ymin]
    
    H_translation = np.array([
        [1, 0, translation_dist[0]],
        [0, 1, translation_dist[1]],
        [0, 0, 1]
    ])
    
    # Combine the original matrix with the translation
    M_final = H_translation.dot(M)
    
    # Calculate the final Canvas Size
    final_w = int(round(xmax - xmin))
    final_h = int(round(ymax - ymin))
    
    # Warp using the shifted matrix and the full bounding box size
    return cv2.warpPerspective(image, M_final, (final_w, final_h)), M_final

# Execute: Use the 'M' calculated in the previous cell (based on the pothole points)
if len(clicked_points) == 4:
    full_view_image, M_smart = warp_perspective_no_crop(raw_frame, M)

    # Verify Scale (Draw line at the NEW Pothole location)
    # Original Pothole was at (0,0) in the UN-shifted world.
    # In the NEW world, it is at (0 + shift_x, 0 + shift_y).
    shift_x = M_smart[0, 2]
    shift_y = M_smart[1, 2]

    cv2.line(full_view_image, (int(shift_x), int(shift_y)), (int(shift_x + 100), int(shift_y)), (0, 255, 0), 10)
    cv2.putText(full_view_image, "10 cm Verified", (int(shift_x), int(shift_y) - 20), 
                cv2.FONT_HERSHEY_SIMPLEX, 2.0, (0, 255, 0), 5)

    # Show Result
    plt.figure(figsize=(12, 18))
    plt.imshow(cv2.cvtColor(full_view_image, cv2.COLOR_BGR2RGB))
    plt.title(f"Full Uncropped View ({full_view_image.shape[1]}x{full_view_image.shape[0]})")
    plt.axis('on')
    plt.show()
else:
    print("Please complete the point selection in Step 2 first.")

  plt.show()
