****Distance Measurement From Camera And Seabed****

**1) Pinhole Camera Method**

In [4]:
import cv2
import numpy as np

                                                                                        # Camera parameters for this method
KNOWN_OBJECT_WIDTH = 0.5                                                                # a known seabed feature size, measured in meters
FOCAL_LENGTH = 800                                                                      # calibrate your camera based on pixel size, measured in pixels

def calculate_distance(pixel_width):
    if pixel_width > 0:
        distance = (KNOWN_OBJECT_WIDTH * FOCAL_LENGTH) / pixel_width
        return distance
    return None

def detect_seabed_feature(frame):
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)                                      # Convertion into grayscale
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)                                         # Applying blur to reduce noise
    edges = cv2.Canny(blurred, 50, 150)                                                 # Edge detection using Canny Filter
    contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)   # Finding contours

    if contours:
        largest_contour = max(contours, key=cv2.contourArea)                            # Getting the largest contour among other
        rect = cv2.minAreaRect(largest_contour)                                         # Get the width of the bounding rectangle
        pixel_width = rect[1][0]                                           # Width in pixels which gets converted into pixel width and returns as result
        return pixel_width
    return 0

cap = cv2.VideoCapture(0)                                                               #  Open the camera and 0 is the default camera index

if not cap.isOpened():
    print("Error: Could not open camera.")
else:
    ret, frame = cap.read()                                                             # Capture a single frame
    if ret:
        pixel_width = detect_seabed_feature(frame)                                      # Detect seabed feature
        
        distance = calculate_distance(pixel_width)                                      # pixel width goes in Calculate distance function
        
        if distance:                                                                    #  Display results
            print(f"Pixel Width: {pixel_width:.2f} pixels")
            print(f"Distance to Seabed: {distance:.2f} meters")
            
            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)                              # Draw the contour on the frame for visualization
            edges = cv2.Canny(gray, 50, 150)
            contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
            if contours:
                largest_contour = max(contours, key=cv2.contourArea)
                cv2.drawContours(frame, [largest_contour], -1, (0, 255, 0), 2)
                cv2.putText(frame, f"Distance: {distance:.2f} m", (10, 30),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
                
                cv2.imshow("Camera Feed", frame)
                cv2.waitKey(0) 
        else:
            print("No valid object detected for distance calculation.")
    else:
        print("Error: Could not read frame.")
    
    cap.release()
    cv2.destroyAllWindows()

Pixel Width: 25.18 pixels
Distance to Seabed: 15.89 meters


**2) MiDAS - AI Based Monocular Depth Estimation**

**It shows frame per second and Relative Depth Value. So Larger Values = Farther Objects , Smaller Values = Closer Objects. It is just a scaled number without units**

In [8]:
!pip install torch torchvision opencv-python timm





In [1]:
import cv2
import torch
import numpy as np
import time

midas = torch.hub.load("intel-isl/MiDaS", "MiDaS_small", trust_repo=True)                     # Loading MiDaS model
midas.eval()

midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms", trust_repo=True)           # Loading transforms
transform = midas_transforms.small_transform

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")                         # Move model to device
midas.to(device)

cap = cv2.VideoCapture(0) # Start webcam
if not cap.isOpened():
    print("Cannot Open Webcam")
    exit()

print("Webcam and MiDaS initialized. Press 'q' to quit.")

prev_time = time.time()

while True:
    ret, frame = cap.read()
    if not ret:
        print("Failed to grab frame")
        break

    img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)                                               # Convert BGR to RGB

    input_image = transform(img).to(device)                                                    # Apply MiDaS transform
    input_batch = input_image                                                                  # already batched

    with torch.no_grad():                                                                      # Depth inference
        prediction = midas(input_batch)
        prediction = torch.nn.functional.interpolate(
            prediction.unsqueeze(1),
            size=img.shape[:2],
            mode="bicubic",
            align_corners=False,
        ).squeeze()

        depth_map = prediction.cpu().numpy()

    depth_min = depth_map.min()                                                                # Normalize depth map for visualization
    depth_max = depth_map.max()
    depth_vis = (255 * (depth_map - depth_min) / (depth_max - depth_min)).astype(np.uint8)
    depth_colormap = cv2.applyColorMap(depth_vis, cv2.COLORMAP_MAGMA)

    current_time = time.time()                                                                  # Calculate FPS
    fps = 1.0 / (current_time - prev_time)
    prev_time = current_time

    cv2.putText(frame, f"FPS: {fps:.2f}", (10, 30),                                             # Draw FPS on original frame
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    h, w = depth_map.shape                                                                      # Draw depth value at center of depth map
    center_x, center_y = w // 2, h // 2
    center_depth = depth_map[center_y, center_x]
    cv2.putText(depth_colormap, f"Depth: {center_depth:.2f}", (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)

    combined = np.hstack((frame, depth_colormap))                                             
    cv2.imshow("Webcam + MiDaS Depth", combined)

    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

cap.release()
cv2.destroyAllWindows()

Using cache found in C:\Users\Wilfred Auxilian/.cache\torch\hub\intel-isl_MiDaS_master
Using cache found in C:\Users\Wilfred Auxilian/.cache\torch\hub\rwightman_gen-efficientnet-pytorch_master


Loading weights:  None


Using cache found in C:\Users\Wilfred Auxilian/.cache\torch\hub\intel-isl_MiDaS_master


Webcam and MiDaS initialized. Press 'q' to quit.


**3) Depth from motion via Optical Flow**

**It captures two continuos frames and compute optical flow between them. It uses Flow Magnitude (Larger Flow = Closer Object).** 

In [2]:
import cv2
import numpy as np
import time

cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("Cannot open webcam")
    exit()

ret, prev_frame = cap.read()
prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)

print("Starting video. Press 'q' to quit.")

while True:
    start = time.time()
    ret, frame = cap.read()
    if not ret:
        break

    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    flow = cv2.calcOpticalFlowFarneback(prev_gray, gray, None,
                                        pyr_scale=0.5, levels=3, winsize=15,
                                        iterations=3, poly_n=5, poly_sigma=1.2, flags=0)

    mag, ang = cv2.cartToPolar(flow[..., 0], flow[..., 1])
    mag_norm = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
    depth_heatmap = cv2.applyColorMap(255 - mag_norm, cv2.COLORMAP_INFERNO)

    fps = 1.0 / (time.time() - start)
    cv2.putText(frame, f"FPS: {fps:.2f}", (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    combo = np.hstack((frame, depth_heatmap))
    cv2.imshow("Motion-Based Depth Proxy", combo)

    prev_gray = gray.copy()

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


Starting video. Press 'q' to quit.


In [1]:
import cv2
import numpy as np
import time

cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("Cannot open webcam")
    exit()

ret, prev_frame = cap.read()                                                        # Read the first frame
prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)

print("Depth from Motion running... Press 'q' to quit.")

while True:
    start_time = time.time()

    ret, frame = cap.read()                                                         # Read the next frame
    if not ret:
        break

    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    flow = cv2.calcOpticalFlowFarneback(prev_gray, gray, None,                              # Compute optical flow
                                        pyr_scale=0.5, levels=3, winsize=15,
                                        iterations=3, poly_n=5, poly_sigma=1.2, flags=0)

    mag, ang = cv2.cartToPolar(flow[..., 0], flow[..., 1])                        # Flow magnitude as proxy for inverse depth (closer objects move more)

    inv_depth = 1 / (mag + 1e-5)                                                  # Normalize inverse magnitude to simulate depth
    inv_depth_normalized = cv2.normalize(inv_depth, None, 0, 255, cv2.NORM_MINMAX)
    depth_display = inv_depth_normalized.astype(np.uint8)

    depth_colored = cv2.applyColorMap(depth_display, cv2.COLORMAP_INFERNO)        # Apply colormap for visualization

    fps = 1.0 / (time.time() - start_time)                                        # FPS Counter
    cv2.putText(frame, f"FPS: {fps:.2f}", (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    h, w = mag.shape                                                              # Display example depth value at the center
    depth_val = inv_depth[int(h/2), int(w/2)]
    cv2.putText(depth_colored, f"Depth(center): {depth_val:.2f} (relative units)", (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)

    combined = np.hstack((frame, depth_colored))                                  # Combine original frame and depth map side-by-side

    cv2.imshow("Depth from Motion (Monocular)", combined)

    prev_gray = gray.copy()

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

Depth from Motion running... Press 'q' to quit.


**4)Structure from Motion (SFM)**


**It process the live frames and detects and matches features among them, triangulates 3D points and estimate avg depth of those 3D Points**


In [3]:
import cv2
import numpy as np

                                                                       # Camera intrinsic parameters (calibrate your camera or use approximate values)
K = np.array([[800, 0, 320], 
              [0, 800, 240],
              [0, 0, 1]], dtype=np.float32)

def extract_features(img):                                             # Detect ORB features and compute descriptors.
    orb = cv2.ORB_create()
    keypoints, descriptors = orb.detectAndCompute(img, None)
    return keypoints, descriptors

def match_features(des1, des2):
    bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)              # Match features between two frames using BFMatcher.
    matches = bf.match(des1, des2)
    matches = sorted(matches, key=lambda x: x.distance)
    return matches

def estimate_motion(pts1, pts2, K):                                    # Estimate essential matrix and recover camera pose.
    E, mask = cv2.findEssentialMat(pts1, pts2, K, method=cv2.RANSAC, prob=0.999, threshold=1.0)
    _, R, t, mask = cv2.recoverPose(E, pts1, pts2, K, mask=mask)
    return R, t, mask

def triangulate_points(pts1, pts2, K, R, t):                           # Triangulate 3D points from matched 2D points.
    pts1 = np.array(pts1, dtype=np.float32).reshape(-1, 2)
    pts2 = np.array(pts2, dtype=np.float32).reshape(-1, 2)

    P1 = np.hstack((K, np.zeros((3, 1))))                               # Projection matrices
    P2 = np.hstack((K @ R, K @ t.reshape(3, 1)))

    pts1 = pts1.T                                                       # Transpose points to 2xN format for cv2.triangulatePoints
    pts2 = pts2.T  

    points_4d = cv2.triangulatePoints(P1, P2, pts1, pts2)               # Triangulate
    points_3d = points_4d[:3] / points_4d[3]                            # Convert to homogeneous coordinates
    return points_3d.T  

def main():
    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        print("Error: Could not open camera.")
        return

    prev_img = None                                                     # Variables to store previous frame data
    prev_kp = None
    prev_des = None

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        
        kp, des = extract_features(gray)

        if prev_img is not None and prev_des is not None:
            matches = match_features(prev_des, des)                                                         # Match features with previous frame
            if len(matches) > 10: 
                pts1 = np.float32([prev_kp[m.queryIdx].pt for m in matches]).reshape(-1, 2)
                pts2 = np.float32([kp[m.trainIdx].pt for m in matches]).reshape(-1, 2)

                R, t, mask = estimate_motion(pts1, pts2, K)                                                 # Estimate camera motion

                mask = mask.ravel() > 0                                                                     # Filter points using mask
                if np.sum(mask) > 0: 
                    points_3d = triangulate_points(pts1[mask], pts2[mask], K, R, t)                         # Triangulate 3D points

                    depths = points_3d[:, 2]                                                                # Compute depth (z-coordinate)
                    valid_depths = depths[depths > 0] 
                    if len(valid_depths) > 0:
                        avg_depth = np.mean(valid_depths)
                        print(f"Average depth: {avg_depth:.2f} meters")

                    img_matches = cv2.drawMatches(prev_img, prev_kp, gray, kp, matches[:50], None,          # Draw matches
                                                flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)
                    cv2.imshow('Feature Matches', img_matches)

        prev_img = gray.copy()                                                                              # Update previous frame data
        prev_kp = kp
        prev_des = des

        cv2.imshow('Live Feed', frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    main()

Average depth: 24.11 meters
Average depth: 30.67 meters
Average depth: 24.89 meters
Average depth: 36.63 meters
Average depth: 37.74 meters
Average depth: 39.31 meters
Average depth: 21.62 meters
Average depth: 19.31 meters
Average depth: 38.62 meters
Average depth: 15.43 meters
Average depth: 15.74 meters
Average depth: 36.31 meters
Average depth: 27.50 meters
Average depth: 38.10 meters
Average depth: 38.10 meters
Average depth: 25.66 meters
Average depth: 34.57 meters
Average depth: 26.10 meters
Average depth: 27.54 meters
Average depth: 33.18 meters
Average depth: 39.04 meters
Average depth: 21.46 meters
Average depth: 24.28 meters
Average depth: 49.29 meters
Average depth: 33.07 meters
Average depth: 29.09 meters
Average depth: 34.40 meters
Average depth: 28.06 meters
Average depth: 37.88 meters
Average depth: 30.69 meters
Average depth: 32.79 meters
Average depth: 48.93 meters
Average depth: 33.06 meters
Average depth: 42.56 meters
Average depth: 29.25 meters
Average depth: 31.90

**5)Intensity Based Depth Estimation**


**Light intensity especially in the red channel decreases with depth due to absorption and scattering. By analyzing color channel attenuation (red diminishes faster than blue/green), we can estimate relative depth. This method uses a single frame and is robust to low-texture scenes.**

In [4]:
import cv2
import numpy as np

def preprocess_underwater_image(image):                                  # Enhance contrast and reduce haze for underwater images.
    lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
    l = clahe.apply(l)
    lab = cv2.merge((l, a, b))
    return cv2.cvtColor(lab, cv2.COLOR_LAB2BGR)

def estimate_depth_from_intensity(image):                                 # Estimate relative depth based on red-to-blue channel ratio.
    b, g, r = cv2.split(image)
    blue = b.astype(np.float32) + 1e-6
    red = r.astype(np.float32) + 1e-6
    ratio = blue / red  
    depth_map = cv2.normalize(ratio, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
    depth_indicator = np.mean(ratio)
    return depth_map, depth_indicator

def main():
    cap = cv2.VideoCapture(0) 
    if not cap.isOpened():
        print("Error: Could not open camera.")
        return

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        frame = preprocess_underwater_image(frame)

        depth_map, depth_indicator = estimate_depth_from_intensity(frame)                       # Estimate depth from intensity
        print(f"Relative depth indicator: {depth_indicator:.2f} (higher = farther)")

        cv2.imshow('Live Feed', frame)
        cv2.imshow('Depth Map', depth_map)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    main()

Relative depth indicator: 392866.28 (higher = farther)
Relative depth indicator: 19018.16 (higher = farther)
Relative depth indicator: 11013.56 (higher = farther)
Relative depth indicator: 15629.43 (higher = farther)
Relative depth indicator: 11472.49 (higher = farther)
Relative depth indicator: 5082.50 (higher = farther)
Relative depth indicator: 3119.58 (higher = farther)
Relative depth indicator: 3760.85 (higher = farther)
Relative depth indicator: 4418.38 (higher = farther)
Relative depth indicator: 1657.95 (higher = farther)
Relative depth indicator: 5248.44 (higher = farther)
Relative depth indicator: 1605.85 (higher = farther)
Relative depth indicator: 1094.77 (higher = farther)
Relative depth indicator: 749.72 (higher = farther)
Relative depth indicator: 749.72 (higher = farther)
Relative depth indicator: 639.04 (higher = farther)
Relative depth indicator: 769.25 (higher = farther)
Relative depth indicator: 707.41 (higher = farther)
Relative depth indicator: 2217.82 (higher = f