In [1]:
import numpy as np
from pathlib import Path
import cv2

def normalize_points_to_unit_cube(pts):
    # center and scale so bounding sphere has radius ~0.5
    pts = pts.astype(np.float32)
    centroid = pts.mean(axis=0, keepdims=True)
    pts = pts - centroid
    max_norm = np.max(np.linalg.norm(pts, axis=1))
    if max_norm > 0:
        pts = pts / (2.0 * max_norm)   # fits roughly in [-0.5,0.5]
    return pts

def render_depth_zbuffer(pts, image_size=224, view='+z'):
    """
    pts: Nx3 numpy array (float)
    view: one of '+x','-x','+y','-y','+z','-z'
    returns: HxW float32 array normalized to [0,1] where nearer => brighter
    """
    assert pts.ndim == 2 and pts.shape[1] >= 3
    pts = normalize_points_to_unit_cube(pts)

    # Choose projection axes and depth axis
    # For '+z' project XY, depth = +Z
    if view in ('+z','-z'):
        u, v = pts[:,0], pts[:,1]
        depth = pts[:,2].copy()
        if view == '-z':
            depth = -depth
    elif view in ('+y','-y'):
        u, v = pts[:,0], pts[:,2]  # XZ plane
        depth = pts[:,1].copy()
        if view == '-y':
            depth = -depth
    elif view in ('+x','-x'):
        u, v = pts[:,1], pts[:,2]  # YZ plane
        depth = pts[:,0].copy()
        if view == '-x':
            depth = -depth
    else:
        raise ValueError('bad view')

    # Map u,v from [-0.5,0.5] to pixel coords
    H = image_size
    grid = H - 1
    iu = np.clip(((u + 0.5) * grid).astype(np.int32), 0, grid)
    iv = np.clip(((v + 0.5) * grid).astype(np.int32), 0, grid)

    # Z-buffer: keep minimum (nearest) depth per pixel
    zbuf = np.full((H, H), np.inf, dtype=np.float32)
    for x, y, d in zip(iu, iv, depth):
        if d < zbuf[y, x]:
            zbuf[y, x] = d

    # If all inf (no points), return zeros
    if np.isinf(zbuf).all():
        return np.zeros((H,H), dtype=np.float32)

    # Replace inf by max depth present (background far)
    mask_inf = np.isinf(zbuf)
    if mask_inf.any():
        maxd = np.nanmax(zbuf[~mask_inf])
        zbuf[mask_inf] = maxd

    # Normalize so near->1.0, far->0.0
    dmin = zbuf.min()
    dmax = zbuf.max()
    if dmax > dmin:
        img = 1.0 - (zbuf - dmin) / (dmax - dmin)
    else:
        img = np.zeros_like(zbuf, dtype=np.float32)

    return img.astype(np.float32)

def generate_6view_depth(pts, image_size=224):
    views = ['+x','-x','+y','-y','+z','-z']
    imgs = [render_depth_zbuffer(pts, image_size=image_size, view=v) for v in views]
    # return array shape (6, H, W)
    return np.stack(imgs, axis=0)


In [2]:
base_path = Path("..")
train_path = base_path / "train"
test_path = base_path / "test"
output_path = base_path / "data" / "multi_view_images"
output_path.mkdir(exist_ok=True)

def save_multiview_pngs(src_root, dest_root, image_size=224):
    for class_dir in src_root.iterdir():
        if not class_dir.is_dir():
            continue
        class_name = class_dir.name
        out_class_dir = dest_root / class_name
        out_class_dir.mkdir(parents=True, exist_ok=True)

        # Process each .txt and .pts file in the class directory and save multi-view images as numpy arrays .npy
        for file_path in class_dir.glob("*"):
            if file_path.suffix not in (".txt", ".pts"):
                continue
            try:
                pts = np.loadtxt(file_path).astype(np.float32)
                if pts.shape[1] > 3:
                    pts = pts[:, :3]  # Use only first 3 columns if more are present
                if pts.shape[0] < 10:
                    print(f"Skipping {file_path} with insufficient points: {pts.shape[0]}")
                    continue
                mv_images = generate_6view_depth(pts, image_size=image_size)  # Shape (6, H, W)
                out_file = out_class_dir / (file_path.stem + ".npy")
                np.save(out_file, mv_images)
            except Exception as e:
                print(f"Error processing {file_path}: {e}")

save_multiview_pngs(train_path, output_path / "train", image_size=224)
save_multiview_pngs(test_path, output_path / "test", image_size=224)
print("Multi-view depth images saved as .npy files.")

Multi-view depth images saved as .npy files.


In [6]:
import numpy as np
a = np.load('../data/multi_view_images/train/Ash/22.npy')
print("The exact shape of the .npy files you save for each tree is: ", a.shape)
print("The data type of the .npy files you save for each tree is: ", a.dtype)
print("The maximum value in the .npy files you save for each tree is: ", a.max())
print("The minimum value in the .npy files you save for each tree is: ", a.min())

print("generating 6 depth images per point cloud, corresponding to these views: +x (right) -x (left) +y (top) -y (bottom) +z (front) -z (back)")


The exact shape of the .npy files you save for each tree is:  (6, 224, 224)
The data type of the .npy files you save for each tree is:  float32
The maximum value in the .npy files you save for each tree is:  1.0
The minimum value in the .npy files you save for each tree is:  0.0
generating 6 depth images per point cloud, corresponding to these views: +x (right) -x (left) +y (top) -y (bottom) +z (front) -z (back)
