In [None]:
import os
import torch
import cv2
import numpy as np
from torchvision.transforms import Compose, Normalize, ToTensor

In [None]:
class DepthEstimator:
    def __init__(self, model_type="DPT_Large"):
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model = torch.hub.load("intel-isl/MiDaS", model_type)
        self.model.to(self.device)
        self.model.eval()

        self.transform = Compose([
            ToTensor(),
            Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
        ])

    def estimate_depth(self, rgb_frame):
        img = cv2.cvtColor(rgb_frame, cv2.COLOR_BGR2RGB)
        input_batch = self.transform(img).unsqueeze(0).to(self.device)

        with torch.no_grad():
            prediction = self.model(input_batch)
            prediction = torch.nn.functional.interpolate(
                prediction.unsqueeze(1),
                size=img.shape[:2],
                mode="bicubic",
                align_corners=False,
            ).squeeze()

        return prediction.cpu().numpy()

    def normalize_depth(self, depth_map):
        return cv2.normalize(depth_map, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)


def process_video_frames(input_video_path, output_dir):
    os.makedirs(os.path.join(output_dir, "rgb"), exist_ok=True)
    os.makedirs(os.path.join(output_dir, "depth_raw"), exist_ok=True)
    os.makedirs(os.path.join(output_dir, "depth_vis"), exist_ok=True)

    depth_estimator = DepthEstimator()
    cap = cv2.VideoCapture(input_video_path)
    skip = 7
    frame_idx = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break

        if frame_idx % skip == 0:

          depth_map = depth_estimator.estimate_depth(frame)
          depth_vis = depth_estimator.normalize_depth(depth_map)

          frame_name = int((frame_idx) / skip)
          print(frame_name)
          # ---- save files ----
          rgb_path   = os.path.join(output_dir, "rgb",   f"frame_{frame_name:02d}.png")
          depth_raw  = os.path.join(output_dir, "depth_raw", f"depth_{frame_name:02d}.npy")
          depth_img  = os.path.join(output_dir, "depth_vis", f"depth_{frame_name:02d}.png")

          ok = cv2.imwrite(rgb_path, frame)            # save RGB frame
          print ("The picture status is:", ok)
          np.save(depth_raw, depth_map)           # full-precision depth
          cv2.imwrite(depth_img, depth_vis)       # 8-bit visualization


        frame_idx += 1


    cap.release()
    cv2.destroyAllWindows()

In [None]:
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/3DGS/
input_video_path = "/content/drive/MyDrive/3DGS/nagoya.mp4"      # folder with your images
output_dir = "/content/drive/MyDrive/3DGS/results"   # where results will be saved

import os
os.makedirs(output_dir, exist_ok=True)

process_video_frames(input_video_path, output_dir)
