In [None]:
# Install dependencies

!pip install torch
!pip install opencv-python
!pip install numpy
!pip install timm
!pip install tqdm
!pip install jax

In [2]:
# Set up imports

import os
import cv2
import tqdm
import torch
import matplotlib.pyplot as plt

import numpy as np

In [3]:
VW_FOURCC_CODEC = "MP4V"

from google.colab.patches import cv2_imshow

def depth_video(filename: str, output_folder: str):
    capture = cv2.VideoCapture(filename)

    frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_rate = int(capture.get(cv2.CAP_PROP_FPS))

    width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))

    fourcc = cv2.VideoWriter_fourcc(*VW_FOURCC_CODEC)

    progress_bar = tqdm.tqdm(total=frame_count, position=0, leave=True)
    out = cv2.VideoWriter("output3d.mp4", fourcc, frame_rate, (width, height))

    #frame_idx = 0

    while capture.isOpened():
        ret, frame = capture.read()

        if frame is not None:
            #depth_frame = depth_image(frame)
            #depth_frame = cv2.normalize(src=depth_frame, dst=None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8U)

            #bit_mask = cv2.inRange(depth_frame, 180, 255)
            #top_layer = cv2.cvtColor(bit_mask, cv2.COLOR_GRAY2BGR)
            #masked = cv2.bitwise_and(frame, top_layer)

            #new_frame = _shift_image_overlay(frame, masked, bit_mask, 15)
            #cv2_imshow(new_frame)

            #cv2.imwrite(os.path.join(output_folder, format(frame_idx, "08")) + ".png", depth_frame)
            new_frame = cv2.cvtColor(convert_image_2d_to_3d(frame), cv2.COLOR_RGB2BGR)
            out.write(new_frame)
            #cv2_imshow(new_frame)
            progress_bar.update(1)
            #frame_idx += 1

        else:
            break

    #!ffmpeg -framerate $frame_rate -i '/content/out/%08d.png' /content/output.mp4

    capture.release()
    out.release()
    progress_bar.close()

In [4]:
def depth_image(img: np.ndarray):
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    input_batch = transform(img).to(device)

    with torch.no_grad():
        prediction = midas(input_batch)

        prediction = torch.nn.functional.interpolate(
            prediction.unsqueeze(1),
            size=img.shape[:2],
            mode="bicubic",
            align_corners=False,
        ).squeeze()

    return prediction.cpu().numpy()

In [5]:
def convert_image_2d_to_3d(image):
  image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
  input_batch = transform(image).to(device)

  torch.set_grad_enabled(False)
  prediction = midas(input_batch)

  prediction = torch.nn.functional.interpolate(
      prediction.unsqueeze(1),
      size=image.shape[:2],
      mode="bicubic",
      align_corners=False,
  ).squeeze()
  torch.set_grad_enabled(True)

  output = prediction.cpu().numpy()

  new_image = image.copy()
  depth_frame = cv2.normalize(src=output, dst=None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8U)

  mesh_x, mesh_y = np.meshgrid(np.arange(depth_frame.shape[1]), np.arange(depth_frame.shape[0]))
  new_mesh_x = mesh_x - (((255 - depth_frame) // 13) + 7)

  new_image[mesh_y, new_mesh_x, 0] = image[mesh_y, mesh_x, 0]#cv2.GaussianBlur(image[mesh_y, mesh_x, 0], (7, 7), cv2.BORDER_DEFAULT)

  return new_image

In [13]:
# Borrowed from https://colab.research.google.com/github/pytorch/pytorch.github.io/blob/master/assets/hub/intelisl_midas_v2.ipynb

# @markdown ---
#@markdown ### Enter the model to use for depth estimation
# @markdown ##### **DPT_Large**: MiDaS v3 - Large *(highest accuracy, slowest inference speed)*
# @markdown ##### **DPT_Hybrid**: MiDaS v3 - Hybrid *(medium accuracy, medium inference speed)*
# @markdown ##### **MiDaS_small**: MiDaS v2.1 - Small *(lowest accuracy, highest inference speed)*
model_type = "DPT_Large" # @param ["DPT_Large", "DPT_Hybrid", "MiDaS_small"]
# @markdown ---
#@markdown ### Enter an image path to convert to 3D:
image_filename = "IMG_3892.PNG" #@param {type:"string"}
# @markdown ##### Or if you are trying to convert a video:
video_filename = "" #@param {type:"string"}
# @markdown ---

In [11]:
midas = torch.hub.load("intel-isl/MiDaS", model_type)

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
midas.to(device)
midas.eval()

midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")

if model_type == "DPT_Large" or model_type == "DPT_Hybrid":
    transform = midas_transforms.dpt_transform
else:
    transform = midas_transforms.small_transform

Using cache found in /root/.cache/torch/hub/intel-isl_MiDaS_master
Using cache found in /root/.cache/torch/hub/intel-isl_MiDaS_master


In [None]:
if image_filename:
  image = cv2.imread(image_filename)
  image_3d = cv2.cvtColor(convert_image_2d_to_3d(image), cv2.COLOR_RGB2BGR)
  cv2_imshow(image_3d)
elif video_filename:
  os.path.makedirs("/content/out")
  print("Video output to /content/out")
  depth_video(video_filename, "/content/out")

In [None]:
# @markdown # Extra plt display
img = cv2.imread(image_filename)

input_batch = transform(img).to(device)

with torch.no_grad():
    prediction = midas(input_batch)

    prediction = torch.nn.functional.interpolate(
        prediction.unsqueeze(1),
        size=img.shape[:2],
        mode="bicubic",
        align_corners=False,
    ).squeeze()

output = prediction.cpu().numpy()

fig, axarr = plt.subplots(1, 3, figsize=(30, 30))

axarr[0].imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
axarr[1].imshow(output, cmap="viridis", interpolation="nearest")
axarr[2].imshow(convert_image_2d_to_3d(img))