Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.
Extracting frames (first 2 minutes)...
Extracted 361 frames (~120 seconds).


In [1]:
import os
from pathlib import Path
import cv2
import torch
from PIL import Image
import numpy as np
import open3d as o3d


# -----------------------------
# Load MiDaS
# -----------------------------
print("Loading MiDaS model...")
midas = torch.hub.load("intel-isl/MiDaS", "MiDaS")
midas.eval()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
midas.to(device)

midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")
transform = midas_transforms.default_transform


Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.
Loading MiDaS model...


Using cache found in C:\Users\James/.cache\torch\hub\intel-isl_MiDaS_master


Loading weights:  None


Using cache found in C:\Users\James/.cache\torch\hub\facebookresearch_WSL-Images_main
Using cache found in C:\Users\James/.cache\torch\hub\intel-isl_MiDaS_master


In [2]:
from pathlib import Path

FRAME_DIR = Path(r"C:\Users\James\Desktop\hackathon\Ironsite-Hackathon\WallFilter\clip_wall_frames")

# Get all image files in folder
frame_paths = sorted(
    [p for p in FRAME_DIR.iterdir() if p.suffix.lower() in [".jpg", ".png"]]
)

print(f"Found {len(frame_paths)} frames.")

Found 288 frames.


In [3]:
# -----------------------------
# Build Point Cloud
# -----------------------------
print("Processing frames and building point cloud...")
pcd = o3d.geometry.PointCloud()

frame_depths = []          # store mean depth per frame
depth_deltas = []          # store frame-to-frame differences
cumulative_translation = 0 # running Z translation

prev_depth = None

for i, fpath in enumerate(frame_paths):
    img = Image.open(fpath).convert("RGB")
    img_np = np.array(img)
    input_tensor = transform(img_np).to(device)

    # ---- Depth prediction ----
    with torch.no_grad():
        prediction = midas(input_tensor)
        prediction = torch.nn.functional.interpolate(
            prediction.unsqueeze(1),
            size=img.size[::-1],
            mode="bicubic",
            align_corners=False
        ).squeeze().cpu().numpy()

    # Normalize depth
    Z = prediction
    Z = Z - Z.min()
    Z = Z / (Z.max() + 1e-8) * 5.0

    # ---- Store frame depth (mean is stable) ----
    frame_depth = np.mean(Z)
    frame_depths.append(frame_depth)

    # ---- Compute depth difference over time ----
    if prev_depth is not None:
        delta = frame_depth - prev_depth
        depth_deltas.append(delta)
    else:
        depth_deltas.append(0.0)

    prev_depth = frame_depth

    # ---- Running average of depth deltas ----
    avg_delta = np.mean(depth_deltas)

    # ---- Use smoothed motion instead of raw ----
    cumulative_translation += avg_delta

    # ---- Backproject to 3D ----
    h, w = Z.shape
    xs = np.linspace(0, 1, w)
    ys = np.linspace(0, 1, h)
    u, v = np.meshgrid(xs, ys)

    cx, cy = w / 2, h / 2
    fx = fy = 0.8 * w

    X = (u * w - cx) * Z / fx
    Y = (v * h - cy) * Z / fy
    points = np.stack((X, Y, Z), axis=-1).reshape(-1, 3)

    # Apply smoothed cumulative translation
    points[:, 2] += cumulative_translation

    colors = img_np.reshape(-1, 3) / 255.0

    frame_pcd = o3d.geometry.PointCloud()
    frame_pcd.points = o3d.utility.Vector3dVector(points)
    frame_pcd.colors = o3d.utility.Vector3dVector(colors)
    frame_pcd = frame_pcd.voxel_down_sample(voxel_size=0.05)
    
    output_dir = r"C:\Users\James\Desktop\hackathon\Ironsite-Hackathon\WallFilter\depth_frames"
    os.makedirs(output_dir, exist_ok=True)
    
    frame_filename = os.path.join(output_dir, f"frame_{i:05d}.ply")
    o3d.io.write_point_cloud(frame_filename, frame_pcd)

    pcd += frame_pcd
    

    if i % 1 == 0:
        print(f"Frame {i}/{len(frame_paths)} | Depth: {frame_depth:.4f} | Avg Δ: {avg_delta:.4f} | {fpath}")

Processing frames and building point cloud...
Frame 0/288 | Depth: 2.8248 | Avg Δ: 0.0000 | C:\Users\James\Desktop\hackathon\Ironsite-Hackathon\WallFilter\clip_wall_frames\frame_000000.png
Frame 1/288 | Depth: 3.0550 | Avg Δ: 0.1151 | C:\Users\James\Desktop\hackathon\Ironsite-Hackathon\WallFilter\clip_wall_frames\frame_000010.png
Frame 2/288 | Depth: 2.9371 | Avg Δ: 0.0374 | C:\Users\James\Desktop\hackathon\Ironsite-Hackathon\WallFilter\clip_wall_frames\frame_000020.png
Frame 3/288 | Depth: 2.8754 | Avg Δ: 0.0126 | C:\Users\James\Desktop\hackathon\Ironsite-Hackathon\WallFilter\clip_wall_frames\frame_000030.png
Frame 4/288 | Depth: 2.8595 | Avg Δ: 0.0069 | C:\Users\James\Desktop\hackathon\Ironsite-Hackathon\WallFilter\clip_wall_frames\frame_000040.png
Frame 5/288 | Depth: 2.9227 | Avg Δ: 0.0163 | C:\Users\James\Desktop\hackathon\Ironsite-Hackathon\WallFilter\clip_wall_frames\frame_000050.png
Frame 6/288 | Depth: 3.0422 | Avg Δ: 0.0310 | C:\Users\James\Desktop\hackathon\Ironsite-Hackatho

Frame 57/288 | Depth: 2.7626 | Avg Δ: -0.0011 | C:\Users\James\Desktop\hackathon\Ironsite-Hackathon\WallFilter\clip_wall_frames\frame_002130.png
Frame 58/288 | Depth: 2.4479 | Avg Δ: -0.0064 | C:\Users\James\Desktop\hackathon\Ironsite-Hackathon\WallFilter\clip_wall_frames\frame_002160.png
Frame 59/288 | Depth: 2.8234 | Avg Δ: -0.0000 | C:\Users\James\Desktop\hackathon\Ironsite-Hackathon\WallFilter\clip_wall_frames\frame_002220.png
Frame 60/288 | Depth: 2.8964 | Avg Δ: 0.0012 | C:\Users\James\Desktop\hackathon\Ironsite-Hackathon\WallFilter\clip_wall_frames\frame_002230.png
Frame 61/288 | Depth: 2.3600 | Avg Δ: -0.0075 | C:\Users\James\Desktop\hackathon\Ironsite-Hackathon\WallFilter\clip_wall_frames\frame_002290.png
Frame 62/288 | Depth: 3.0895 | Avg Δ: 0.0042 | C:\Users\James\Desktop\hackathon\Ironsite-Hackathon\WallFilter\clip_wall_frames\frame_002310.png
Frame 63/288 | Depth: 2.7268 | Avg Δ: -0.0015 | C:\Users\James\Desktop\hackathon\Ironsite-Hackathon\WallFilter\clip_wall_frames\fram

Frame 114/288 | Depth: 3.5050 | Avg Δ: 0.0059 | C:\Users\James\Desktop\hackathon\Ironsite-Hackathon\WallFilter\clip_wall_frames\frame_004570.png
Frame 115/288 | Depth: 3.3777 | Avg Δ: 0.0048 | C:\Users\James\Desktop\hackathon\Ironsite-Hackathon\WallFilter\clip_wall_frames\frame_004580.png
Frame 116/288 | Depth: 3.6079 | Avg Δ: 0.0067 | C:\Users\James\Desktop\hackathon\Ironsite-Hackathon\WallFilter\clip_wall_frames\frame_004590.png
Frame 117/288 | Depth: 3.3297 | Avg Δ: 0.0043 | C:\Users\James\Desktop\hackathon\Ironsite-Hackathon\WallFilter\clip_wall_frames\frame_004600.png
Frame 118/288 | Depth: 3.0382 | Avg Δ: 0.0018 | C:\Users\James\Desktop\hackathon\Ironsite-Hackathon\WallFilter\clip_wall_frames\frame_004610.png
Frame 119/288 | Depth: 3.4228 | Avg Δ: 0.0050 | C:\Users\James\Desktop\hackathon\Ironsite-Hackathon\WallFilter\clip_wall_frames\frame_004620.png
Frame 120/288 | Depth: 3.6447 | Avg Δ: 0.0068 | C:\Users\James\Desktop\hackathon\Ironsite-Hackathon\WallFilter\clip_wall_frames\fr

Frame 171/288 | Depth: 1.1001 | Avg Δ: -0.0100 | C:\Users\James\Desktop\hackathon\Ironsite-Hackathon\WallFilter\clip_wall_frames\frame_005230.png
Frame 172/288 | Depth: 0.9624 | Avg Δ: -0.0108 | C:\Users\James\Desktop\hackathon\Ironsite-Hackathon\WallFilter\clip_wall_frames\frame_005240.png
Frame 173/288 | Depth: 0.9209 | Avg Δ: -0.0109 | C:\Users\James\Desktop\hackathon\Ironsite-Hackathon\WallFilter\clip_wall_frames\frame_005260.png
Frame 174/288 | Depth: 1.2146 | Avg Δ: -0.0092 | C:\Users\James\Desktop\hackathon\Ironsite-Hackathon\WallFilter\clip_wall_frames\frame_005280.png
Frame 175/288 | Depth: 1.4284 | Avg Δ: -0.0079 | C:\Users\James\Desktop\hackathon\Ironsite-Hackathon\WallFilter\clip_wall_frames\frame_005290.png
Frame 176/288 | Depth: 1.0137 | Avg Δ: -0.0102 | C:\Users\James\Desktop\hackathon\Ironsite-Hackathon\WallFilter\clip_wall_frames\frame_005300.png
Frame 177/288 | Depth: 1.4154 | Avg Δ: -0.0079 | C:\Users\James\Desktop\hackathon\Ironsite-Hackathon\WallFilter\clip_wall_fr

Frame 228/288 | Depth: 2.4984 | Avg Δ: -0.0014 | C:\Users\James\Desktop\hackathon\Ironsite-Hackathon\WallFilter\clip_wall_frames\frame_012590.png
Frame 229/288 | Depth: 3.1830 | Avg Δ: 0.0016 | C:\Users\James\Desktop\hackathon\Ironsite-Hackathon\WallFilter\clip_wall_frames\frame_012720.png
Frame 230/288 | Depth: 2.8115 | Avg Δ: -0.0001 | C:\Users\James\Desktop\hackathon\Ironsite-Hackathon\WallFilter\clip_wall_frames\frame_012730.png
Frame 231/288 | Depth: 2.4494 | Avg Δ: -0.0016 | C:\Users\James\Desktop\hackathon\Ironsite-Hackathon\WallFilter\clip_wall_frames\frame_012740.png
Frame 232/288 | Depth: 2.5630 | Avg Δ: -0.0011 | C:\Users\James\Desktop\hackathon\Ironsite-Hackathon\WallFilter\clip_wall_frames\frame_012750.png
Frame 233/288 | Depth: 2.7995 | Avg Δ: -0.0001 | C:\Users\James\Desktop\hackathon\Ironsite-Hackathon\WallFilter\clip_wall_frames\frame_012800.png
Frame 234/288 | Depth: 2.8388 | Avg Δ: 0.0001 | C:\Users\James\Desktop\hackathon\Ironsite-Hackathon\WallFilter\clip_wall_fram

Frame 285/288 | Depth: 0.6590 | Avg Δ: -0.0076 | C:\Users\James\Desktop\hackathon\Ironsite-Hackathon\WallFilter\clip_wall_frames\frame_018720.png
Frame 286/288 | Depth: 2.3651 | Avg Δ: -0.0016 | C:\Users\James\Desktop\hackathon\Ironsite-Hackathon\WallFilter\clip_wall_frames\frame_018890.png
Frame 287/288 | Depth: 2.0937 | Avg Δ: -0.0025 | C:\Users\James\Desktop\hackathon\Ironsite-Hackathon\WallFilter\clip_wall_frames\frame_019130.png


In [4]:
import open3d as o3d

# Load your saved point cloud
pcd = o3d.io.read_point_cloud(r"C:\Users\James\Desktop\hackathon\Ironsite-Hackathon\WallFilter\depth_frames\frame_00055.ply")

# Visualize
o3d.visualization.draw_geometries([pcd])

In [10]:
import numpy as np

depths = np.array(frame_depths)

# Safety check
if len(depths) < 50:
    raise ValueError("Need at least 40 frames to compare first 20 and last 20.")

first_20 = depths[:25]
last_20 = depths[-25:]

# Compute statistics
first_mean = np.mean(first_20)
last_mean = np.mean(last_20)

first_std = np.std(first_20)
last_std = np.std(last_20)

difference = last_mean - first_mean
percent_change = (difference / first_mean) * 100

print("---- Depth Comparison ----")
print(f"First 20 Mean Depth: {first_mean:.4f}")
print(f"Last 20 Mean Depth:  {last_mean:.4f}")
print(f"Difference:          {difference:.4f}")
print(f"Percent Change:      {percent_change:.2f}%")

---- Depth Comparison ----
First 20 Mean Depth: 2.6856
Last 20 Mean Depth:  2.0841
Difference:          -0.6015
Percent Change:      -22.40%
