In [10]:
import numpy as np
import cv2
import torch
import os
import sys
import matplotlib

from PIL import Image
#from dotenv import load_dotenv
from scipy.ndimage import center_of_mass
from src.third_party.depth_anything_v2.dpt import DepthAnythingV2
from ultralytics import YOLO
from torchvision.transforms import Compose, Resize, ToTensor, Normalize

image_path = 'outputs/K6xsEng2PhU/tracked_vehicles/vehicle_11_frame_475.jpg'

### Pixel Height Estimation of Ruler

In [13]:
import cv2
import numpy as np

# Initialize a list to store the coordinates
coordinates = []

def get_coordinates(event, x, y, flags, param):
    if event == cv2.EVENT_LBUTTONDOWN:
        # Store the coordinates
        coordinates.append((x, y))
        print(f"Mouse clicked at: ({x}, {y})")

# Specify the rotation angle in degrees (counterclockwise)
rotation_angle = 0  # Replace with desired angle

# Load the image
image = cv2.imread(image_path)

# Check if the image was loaded successfully
if image is None:
    print("Error: Could not load image.")
    exit()

# Get image dimensions
image_height, image_width = image.shape[:2]

# Calculate the center of the image for rotation
center = (image_width // 2, image_height // 2)

# Compute the rotation matrix
rotation_matrix = cv2.getRotationMatrix2D(center, rotation_angle, 1.0)

# Determine the new bounding dimensions of the image after rotation
cos = abs(rotation_matrix[0, 0])
sin = abs(rotation_matrix[0, 1])
new_width = int((image_height * sin) + (image_width * cos))
new_height = int((image_height * cos) + (image_width * sin))

# Adjust the rotation matrix to account for translation
rotation_matrix[0, 2] += (new_width / 2) - center[0]
rotation_matrix[1, 2] += (new_height / 2) - center[1]

# Rotate the image
rotated_image = cv2.warpAffine(image, rotation_matrix, (new_width, new_height))

# Create a window and set the mouse callback function
cv2.namedWindow('Rotated Image')
cv2.setMouseCallback('Rotated Image', get_coordinates)

# Display the rotated image
while True:
    cv2.imshow('Rotated Image', rotated_image)
    
    # Exit if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources and close windows
cv2.destroyAllWindows()

# Print the clicked coordinates
print("Clicked coordinates:", coordinates)
print(f"Ruler Height in Pixels: {coordinates[1][1] - coordinates[0][1]}")


Mouse clicked at: (978, 503)
Mouse clicked at: (979, 562)
Clicked coordinates: [(978, 503), (979, 562)]
Ruler Height in Pixels: 59


### Depth Estimation of Ruler

In [18]:
DEVICE = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu'

model_configs = {
    'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
    'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
    'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
    'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]}
}

encoder = 'vitl'
model = DepthAnythingV2(**model_configs[encoder])
model.load_state_dict(torch.load(f'models_dav2/depth_anything_v2_{encoder}.pth', map_location='cuda'))
model.to(DEVICE)
model.eval()

raw_img = cv2.imread(image_path)

# Rotate for better measurement of height
rotation_angle = 0 # Counterclockwise rotation in degrees
center = (raw_img.shape[1] // 2, raw_img.shape[0] // 2)  # Image center
scale = 1.0  # No scaling

# Compute the rotation matrix
rotation_matrix = cv2.getRotationMatrix2D(center, rotation_angle, scale)

# Perform the rotation
rotated_img = cv2.warpAffine(raw_img, rotation_matrix, (raw_img.shape[1], raw_img.shape[0]))
depth = model.infer_image(raw_img)

# Normalize depth map
depth_normalized = (depth - depth.min()) / (depth.max() - depth.min()) * 255

# Define the top-left and bottom-right coordinates of the rectangular area
ruler_top_left_x = coordinates[0][0] # X-coordinate of the top-left corner
ruler_top_left_y = coordinates[0][1]  # Y-coordinate of the top-left corner
ruler_bottom_right_x = coordinates[1][0]  # X-coordinate of the bottom-right corner
ruler_bottom_right_y = coordinates[1][1]  # Y-coordinate of the bottom-right corner

# Extract the depth values within the ruler
ruler_depth_values = depth_normalized[
    ruler_top_left_y:ruler_bottom_right_y,
    ruler_top_left_x:ruler_bottom_right_x
]

# Calculate the average depth within the ruler area
ruler_depth_average = ruler_depth_values.mean()

# Print the average depth
print(f"Ruler Depth: {ruler_depth_average}")

Ruler Depth: 46.10896682739258


In [19]:
# Formula -> vessel_height_actual = vessel_height_px * (ruler_height_actual/rulerheight_px) * (depth_ship/depth_ruler)
ruler_height_actual = 1
rulerheight_px = 202
depth_ship = 28.26
depth_ruler = 33

vessel_height_actual = 120 * (ruler_height_actual/rulerheight_px) * (depth_ship/depth_ruler)
vessel_height_actual

0.5087308730873088