In [1]:
import torch
import cv2
import time
import numpy as np

In [2]:
# Load a MiDas model for depth estimation
# model_type = "DPT_Large"  # MiDas v3 - Large (highest accuracy, slowest inference speed)
# model_type = "DPT_Hybrid"  # MiDas v3 - Hybrid (medium accuracy, medium inference speed)
model_type = "MiDaS_small"  # MiDas v3 - Small (lowest accuracy, highest inference speed)
midas = torch.hub.load("intel-isl/MiDaS", model_type)

Using cache found in C:\Users\admin2/.cache\torch\hub\intel-isl_MiDaS_master


Loading weights:  None


Using cache found in C:\Users\admin2/.cache\torch\hub\rwightman_gen-efficientnet-pytorch_master


In [3]:
# Mode model to GPU if available
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
midas.to(device)
midas.eval()

MidasNet_small(
  (pretrained): Module(
    (layer1): Sequential(
      (0): Conv2dSameExport(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
      (1): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
      (3): Sequential(
        (0): DepthwiseSeparableConv(
          (conv_dw): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (bn1): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
          (act1): ReLU6(inplace=True)
          (se): Identity()
          (conv_pw): Conv2d(32, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn2): BatchNorm2d(24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
          (act2): Identity()
        )
      )
      (4): Sequential(
        (0): InvertedResidual(
          (conv_pw): Conv2d(24, 144, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(144,

In [4]:
# Load transforms to resize and normalize the image
midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")
transform = midas_transforms.small_transform

Using cache found in C:\Users\admin2/.cache\torch\hub\intel-isl_MiDaS_master


In [6]:
# Read the input image
input_image_path = r'C:\Users\admin2\horsesole_detection\datatset\images\image_0018.jpg'  # Provide the path to your image
img = cv2.imread(input_image_path)
if img is None:
    print("Error: Unable to read the input image.")
    exit()

# Convert image to RGB
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

# Apply input transforms
input_batch = transform(img).to(device)

# Start measuring time before processing the image
start = time.time()

# Prediction and resize to original resolution
with torch.no_grad():
    prediction = midas(input_batch)
    prediction = torch.nn.functional.interpolate(
        prediction.unsqueeze(1),
        size=img.shape[:2],
        mode="bicubic",
        align_corners=False,
    ).squeeze()

# End measuring time after processing the image
end = time.time()
totalTime = end - start

# Check totalTime to avoid division by zero
if totalTime != 0:
    fps = 1 / totalTime
else:
    fps = 0

depth_map = prediction.cpu().numpy()
depth_map = cv2.normalize(depth_map, None, 0, 1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_64F)

# Apply the color map to the depth map
depth_map = (depth_map * 255).astype(np.uint8)
depth_map_colored = cv2.applyColorMap(depth_map, cv2.COLORMAP_MAGMA)

# Display the image and depth map
cv2.putText(img, f'FPS: {int(fps)}', (20, 70), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 255, 0), 2)
cv2.imshow('Image', img)
cv2.imshow('Depth Map', depth_map_colored)

# Wait for any key to be pressed and close the windows
cv2.waitKey(0)
cv2.destroyAllWindows()
