In [1]:
import matplotlib.pyplot as plt
# external imports
import transformers
from transformers import AutoImageProcessor, AutoModelForDepthEstimation
import torch
import torchvision
import time 
import numpy as np
from PIL import Image
from PIL import ImageFile
import requests
import datasets
from datasets import load_dataset
from torchvision import datasets, transforms
from tqdm import tqdm
import os
import cv2

# GoPro with DepthAnything


## Large model


In [2]:
# Set this to True to avoid errors with truncated images
ImageFile.LOAD_TRUNCATED_IMAGES = True

image_folder = "C:\\Users\\susan\\Documents\\University\\MIR\\DataDriven\\GOPRO\\images\\"
image_files = os.listdir(image_folder)

results_folder = "C:\\Users\\susan\\Documents\\University\\MIR\\DataDriven\\GOPRO\\processed_images\\depth_maps_large\\"
pretty_folder = "C:\\Users\\susan\\Documents\\University\\MIR\\DataDriven\\GOPRO\\processed_images\\pretty_large\\"

image_processor = AutoImageProcessor.from_pretrained("LiheYoung/depth-anything-large-hf")
model = AutoModelForDepthEstimation.from_pretrained("LiheYoung/depth-anything-large-hf")

# Move model and image processor to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


for filename in tqdm(image_files, desc="Processing images"):
    image_path = os.path.join(image_folder, filename)
    image = Image.open(image_path)

    already_processed = False
    # skip if already done
    for root, dirs, files in os.walk(pretty_folder):
        for file in files:
            if filename==file:
               already_processed = True

    if already_processed:
       continue

    
    # Prepare image for the model
    inputs = image_processor(images=image, return_tensors="pt")
    # Move to GPU
    inputs = {key: value.to(device) for key, value in inputs.items()}
 
    with torch.no_grad():
        outputs = model(**inputs)
        predicted_depth = outputs.predicted_depth
    
    # interpolate to original size
    prediction = torch.nn.functional.interpolate(
        predicted_depth.unsqueeze(1),
        size=image.size[::-1],
        mode="bicubic",
        align_corners=False,
    )
    
    # save the prediction
    output = prediction.squeeze().cpu().numpy()
    formatted = (output * 255 / np.max(output)).astype("uint8")
    depth = Image.fromarray(formatted)
    depth.save(os.path.join(results_folder, filename))
    
    # save the prediction in a pretty way
    fig, ax = plt.subplots(1, 2, dpi=400)
    
    ax[0].imshow(image)
    ax[0].axis('off')
    ax[0].set_title('GoPro')
    ax[1].imshow(depth, cmap= 'plasma')
    ax[1].axis('off')
    ax[1].set_title('DepthAnything')
    
    fig.savefig(os.path.join(pretty_folder, filename))
    plt.close(fig)

Processing images: 100%|█████████████████████████████████████████████████████████████| 176/176 [14:05<00:00,  4.80s/it]


## Small model

In [2]:
# Set this to True to avoid errors with truncated images
ImageFile.LOAD_TRUNCATED_IMAGES = True

image_folder = "C:\\Users\\susan\\Documents\\University\\MIR\\DataDriven\\GOPRO\\images\\"
image_files = os.listdir(image_folder)

results_folder = "C:\\Users\\susan\\Documents\\University\\MIR\\DataDriven\\GOPRO\\processed_images\\depth_maps_small\\"
pretty_folder = "C:\\Users\\susan\\Documents\\University\\MIR\\DataDriven\\GOPRO\\processed_images\\pretty_small\\"

image_processor = AutoImageProcessor.from_pretrained("LiheYoung/depth-anything-small-hf")
model = AutoModelForDepthEstimation.from_pretrained("LiheYoung/depth-anything-small-hf")

# Move model and image processor to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


for filename in tqdm(image_files, desc="Processing images"):
    image_path = os.path.join(image_folder, filename)
    image = Image.open(image_path)

    already_processed = False
    # skip if already done
    for root, dirs, files in os.walk(pretty_folder):
        for file in files:
            if filename==file:
               already_processed = True

    if already_processed:
       continue

    
    # Prepare image for the model
    inputs = image_processor(images=image, return_tensors="pt")
    # Move to GPU
    inputs = {key: value.to(device) for key, value in inputs.items()}

    with torch.no_grad():
        outputs = model(**inputs)
        predicted_depth = outputs.predicted_depth
    
    # interpolate to original size
    prediction = torch.nn.functional.interpolate(
        predicted_depth.unsqueeze(1),
        size=image.size[::-1],
        mode="bicubic",
        align_corners=False,
    )
    
    # save the prediction
    output = prediction.squeeze().cpu().numpy()
    formatted = (output * 255 / np.max(output)).astype("uint8")
    depth = Image.fromarray(formatted)
    depth.save(os.path.join(results_folder, filename))

    # save the prediction in a pretty way
    fig, ax = plt.subplots(1, 2, dpi=400)
    
    ax[0].imshow(image)
    ax[0].axis('off')
    ax[0].set_title('GoPro')
    ax[1].imshow(depth, cmap= 'plasma')
    ax[1].axis('off')
    ax[1].set_title('DepthAnything')
    
    fig.savefig(os.path.join(pretty_folder, filename))
    plt.close(fig)

Processing images: 100%|█████████████████████████████████████████████████████████████| 176/176 [07:15<00:00,  2.47s/it]


## Video

In [3]:
# Load image processor and model
image_processor = AutoImageProcessor.from_pretrained("LiheYoung/depth-anything-large-hf")
model = AutoModelForDepthEstimation.from_pretrained("LiheYoung/depth-anything-large-hf")

# Move model and image processor to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Load video
filename = "GH010032.mp4"
folder = "C:\\Users\\susan\\Documents\\University\\MIR\\DataDriven\\GOPRO\\"
video = os.path.join(folder, filename)
cap = cv2.VideoCapture(video)  # creates a video capture object cap

# Define output video writer
filename_with_depth = filename[:-4] + "_depth.avi"
output_video_path = os.path.join(folder, filename_with_depth)

# set variables
fps = cap.get(cv2.CAP_PROP_FPS)  
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
out = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*'XVID'), fps, (frame_width, frame_height))  # creates a VideoWriter 
                                        # object out that will be used to write frames with depth information to an output video file.

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    
    # Convert frame to PIL image
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    pil_image = Image.fromarray(frame)

    # Prepare image for the model
    inputs = image_processor(images=pil_image, return_tensors="pt")
    # Move inputs to GPU
    inputs = {key: value.to(device) for key, value in inputs.items()}

    # Run inference
    with torch.no_grad():
        outputs = model(**inputs)
        predicted_depth = outputs.predicted_depth

    # Interpolate to original size
    prediction = torch.nn.functional.interpolate(
        predicted_depth.unsqueeze(1),
        size=(frame_height, frame_width),
        mode="bicubic",
        align_corners=False,
    )

    # Convert depth prediction to numpy array
    depth_map = prediction.squeeze().cpu().numpy()

    # Normalize depth map for visualization
    depth_map = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min()) * 255

    # Convert depth map to uint8
    depth_map = depth_map.astype(np.uint8)

    # Apply the "plasma" colormap to the depth map
    depth_map_colored = cv2.applyColorMap(depth_map, cv2.COLORMAP_PLASMA)

    # Write frame with depth map to output video
    out.write(depth_map_colored)
    
cap.release()
out.release()