In [2]:
import cv2
import numpy as np
import torch

# === Load MiDaS Model for Depth Estimation ===
model_type = "DPT_Large"  # or "MiDaS_small" for faster inference
midas = torch.hub.load("intel-isl/MiDaS", model_type)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
midas.to(device)
midas.eval()

midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")
transform = midas_transforms.dpt_transform if model_type in ["DPT_Large", "DPT_Hybrid"] else midas_transforms.small_transform

# === Parameters ===
max_disp = 20  # Maximum disparity (pixel shift) for closer objects

# Open the input video
input_video = "input.mp4"
cap = cv2.VideoCapture(input_video)
if not cap.isOpened():
    raise ValueError("Error opening video file")

fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

output_video = "output_anaglyph.mp4"
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_video, fourcc, fps, (width, height))

def create_anaglyph(left_view, right_view):
    """
    Creates a red-cyan anaglyph by:
    - Taking the red channel from the left view.
    - Taking the green and blue channels from the right view.
    """
    # OpenCV uses BGR order: index 2 is red, 1 is green, 0 is blue.
    anaglyph = np.zeros_like(left_view)
    anaglyph[:,:,2] = left_view[:,:,2]       # Red channel from left view
    anaglyph[:,:,1] = right_view[:,:,1]        # Green channel from right view
    anaglyph[:,:,0] = right_view[:,:,0]        # Blue channel from right view
    return anaglyph

frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
print(f"Processing {frame_count} frames for anaglyph conversion...")

frame_index = 0
while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Convert frame to RGB for MiDaS model processing
    img_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    input_batch = transform(img_rgb).to(device)

    with torch.no_grad():
        prediction = midas(input_batch)

    prediction_resized = torch.nn.functional.interpolate(
        prediction.unsqueeze(1),
        size=(height, width),
        mode="bicubic",
        align_corners=False
    ).squeeze().cpu().numpy()

    depth_min = prediction_resized.min()
    depth_max = prediction_resized.max()
    normalized_depth = (prediction_resized - depth_min) / (depth_max - depth_min + 1e-8)

    disparity = max_disp * (1 - normalized_depth)
    disparity = disparity.astype(np.float32)

    xx, yy = np.meshgrid(np.arange(width), np.arange(height))
    map_x = (xx - disparity).astype(np.float32)
    map_y = yy.astype(np.float32)
    right_view = cv2.remap(frame, map_x, map_y, interpolation=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REPLICATE)

    # Create the anaglyph image from the left and right views
    anaglyph_frame = create_anaglyph(frame, right_view)
    out.write(anaglyph_frame)

    frame_index += 1
    if frame_index % 10 == 0:
        print(f"Processed {frame_index}/{frame_count} frames", end='\r')

cap.release()
out.release()
print("\nAnaglyph stereoscopic conversion complete!")


Using cache found in /root/.cache/torch/hub/intel-isl_MiDaS_master
Using cache found in /root/.cache/torch/hub/intel-isl_MiDaS_master


Processing 1238 frames for anaglyph conversion...

Anaglyph stereoscopic conversion complete!
