In [4]:
import tkinter as tk
from PIL import Image, ImageDraw
import numpy as np
import cv2
import torch
import open3d as o3d

# --- Step 1: Drawing Canvas (Tkinter) ---
WIDTH, HEIGHT = 512, 512

class PaintApp:
    def __init__(self, root):
        self.image = Image.new("RGB", (WIDTH, HEIGHT), "white")
        self.draw = ImageDraw.Draw(self.image)
        self.last = None
        self.canvas = tk.Canvas(root, width=WIDTH, height=HEIGHT, bg='white')
        self.canvas.pack()
        self.canvas.bind("<B1-Motion>", self.paint)
        self.canvas.bind("<ButtonRelease-1>", self.reset)
        self.save_btn = tk.Button(root, text="Generate PointCloud", command=self.save_and_exit)
        self.save_btn.pack()
        self.quit = False

    def paint(self, event):
        if self.last is not None:
            self.canvas.create_line(self.last[0], self.last[1], event.x, event.y, fill='black', width=4)
            self.draw.line([self.last, (event.x, event.y)], fill='black', width=12)
        self.last = (event.x, event.y)

    def reset(self, event):
        self.last = None

    def save_and_exit(self):
        self.image.save('canvas.png')
        self.quit = True
        root.quit()

root = tk.Tk()
root.title("Draw your shape and click 'Generate PointCloud'")
app = PaintApp(root)
root.mainloop()

# --- Step 2: Run MiDaS on GPU for Depth Estimation ---
device = 'cuda' if torch.cuda.is_available() else 'cpu'
midas = torch.hub.load("intel-isl/MiDaS", "DPT_Large", pretrained=True).to(device)
midas.eval()
transform = torch.hub.load("intel-isl/MiDaS", "transforms").dpt_transform

img = cv2.imread("canvas.png")
if img is None:
    raise ValueError("Failed to load image. Check if 'canvas.png' exists.")
if len(img.shape) == 2:  # grayscale
    img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

input_tensor = transform(img_rgb).to(device)
print("Input tensor shape:", input_tensor.shape)  # Should be (1, 3, H, W)

with torch.no_grad():
    depth = midas(input_tensor)[0].cpu().numpy()

# --- Step 3: Convert Depth to 3D Point Cloud ---
h, w = depth.shape
x, y = np.meshgrid(np.arange(w), np.arange(h))
points = np.stack([x.flatten(), y.flatten(), depth.flatten()], axis=1)

# Optional: mask out background (white)
mask = np.mean(img_rgb, axis=2) < 250
points = points[mask.flatten()]

# --- Step 4: Visualize Point Cloud ---
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(points)
colors = img_rgb[mask]
colors = colors / 255.0
if colors.shape[0] == points.shape[0]:
    pcd.colors = o3d.utility.Vector3dVector(colors)
o3d.visualization.draw_geometries([pcd])

Using cache found in C:\Users\vedhr/.cache\torch\hub\intel-isl_MiDaS_master


Input tensor shape: torch.Size([1, 3, 384, 384])


Using cache found in C:\Users\vedhr/.cache\torch\hub\intel-isl_MiDaS_master


IndexError: boolean index did not match indexed array along dimension 0; dimension is 147456 but corresponding boolean dimension is 262144

In [1]:
import tkinter as tk
from PIL import Image, ImageDraw
import numpy as np
import cv2
import torch
import open3d as o3d

# --- Step 1: Drawing Canvas (Tkinter) ---
WIDTH, HEIGHT = 512, 512

class PaintApp:
    def __init__(self, root):
        self.image = Image.new("RGB", (WIDTH, HEIGHT), "white")
        self.draw = ImageDraw.Draw(self.image)
        self.last = None
        self.canvas = tk.Canvas(root, width=WIDTH, height=HEIGHT, bg='white')
        self.canvas.pack()
        self.canvas.bind("<B1-Motion>", self.paint)
        self.canvas.bind("<ButtonRelease-1>", self.reset)
        self.save_btn = tk.Button(root, text="Generate PointCloud", command=self.save_and_exit)
        self.save_btn.pack()
        self.quit = False

    def paint(self, event):
        if self.last is not None:
            self.canvas.create_line(self.last[0], self.last[1], event.x, event.y, fill='black', width=4)
            self.draw.line([self.last, (event.x, event.y)], fill='black', width=12)
        self.last = (event.x, event.y)

    def reset(self, event):
        self.last = None

    def save_and_exit(self):
        self.image.save('canvas.png')
        self.quit = True
        root.quit()

root = tk.Tk()
root.title("Draw your shape and click 'Generate PointCloud'")
app = PaintApp(root)
root.mainloop()

# --- Step 2: Run MiDaS on GPU for Depth Estimation ---
device = 'cuda' if torch.cuda.is_available() else 'cpu'
midas = torch.hub.load("intel-isl/MiDaS", "DPT_Large", pretrained=True).to(device)
midas.eval()
transform = torch.hub.load("intel-isl/MiDaS", "transforms").dpt_transform

img = cv2.imread("canvas.png")
if img is None:
    raise ValueError("Failed to load image. Check if 'canvas.png' exists.")
if len(img.shape) == 2:  # grayscale
    img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

# Resize to MiDaS input size (384x384 for DPT_Large)
TARGET_SIZE = 384
img_resized = cv2.resize(img_rgb, (TARGET_SIZE, TARGET_SIZE), interpolation=cv2.INTER_CUBIC)

input_tensor = transform(img_resized).to(device)
print("Input tensor shape:", input_tensor.shape)  # Should be (1, 3, 384, 384)

with torch.no_grad():
    depth = midas(input_tensor)[0].cpu().numpy()

# --- Step 3: Convert Depth to 3D Point Cloud ---
h, w = depth.shape
x, y = np.meshgrid(np.arange(w), np.arange(h))
points = np.stack([x.flatten(), y.flatten(), depth.flatten()], axis=1)

# Optional: mask out background (white)
mask = np.mean(img_resized, axis=2) < 250
points = points[mask.flatten()]
# --- Step 4: Visualize Point Cloud
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(points)
colors = img_resized[mask]
colors = colors / 255.0
if colors.shape[0] == points.shape[0]:
    pcd.colors = o3d.utility.Vector3dVector(colors)
o3d.visualization.draw_geometries([pcd])

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


Using cache found in C:\Users\vedhr/.cache\torch\hub\intel-isl_MiDaS_master
Using cache found in C:\Users\vedhr/.cache\torch\hub\intel-isl_MiDaS_master


Input tensor shape: torch.Size([1, 3, 384, 384])
