In [None]:
#Example NPZ FILE
!mkdir -p data/PittsburghBridge
!wget -P data/PittsburghBridge https://dl.fbaipublicfiles.com/pytorch3d/data/PittsburghBridge/pointcloud.npz

In [1]:
import sys
import torch

need_pytorch3d = False
try:
    import pytorch3d
except ModuleNotFoundError:
    need_pytorch3d = True
if need_pytorch3d:
    pyt_version_str = torch.__version__.split("+")[0].replace(".", "")
    version_str = "".join([
        f"py3{sys.version_info.minor}_cu",
        torch.version.cuda.replace(".", ""),
        f"_pyt{pyt_version_str}"
    ])
    !pip install iopath
    if sys.platform.startswith("linux"):
        print("Trying to install wheel for PyTorch3D")
        !pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/{version_str}/download.html
        pip_list = !pip freeze
        need_pytorch3d = not any(i.startswith("pytorch3d==") for i in pip_list)
    if need_pytorch3d:
        print(f"failed to find/install wheel for {version_str}")
if need_pytorch3d:
    print("Installing PyTorch3D from source")
    !pip install ninja
    !pip install 'git+https://github.com/facebookresearch/pytorch3d.git@stable'

Collecting iopath
  Downloading iopath-0.1.10.tar.gz (42 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/42.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.2/42.2 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting portalocker (from iopath)
  Downloading portalocker-3.1.1-py3-none-any.whl.metadata (8.6 kB)
Downloading portalocker-3.1.1-py3-none-any.whl (19 kB)
Building wheels for collected packages: iopath
  Building wheel for iopath (setup.py) ... [?25l[?25hdone
  Created wheel for iopath: filename=iopath-0.1.10-py3-none-any.whl size=31528 sha256=ffdf7af4b26d736da94b7323af1bdf19b6f7927a98537e15d8d854b5101067a4
  Stored in directory: /root/.cache/pip/wheels/9a/a3/b6/ac0fcd1b4ed5cfeb3db92e6a0e476cfd48ed0df92b91080c1d
Successfully built iopath
Installing collected packages: portalocker, iopath
Successfully installed iopath-0.1.10 portaloc

In [2]:
from pytorch3d.structures import Pointclouds

def bounding_sphere_normalize(points: torch.Tensor) -> torch.Tensor:
    """
    points: (N,3) tensor of point coords
    Return normalized points in a unit sphere centered at origin.
    """
    center = points.mean(dim=0, keepdim=True)
    max_dist = (points - center).norm(p=2, dim=1).max()
    points_normed = (points - center) / max_dist
    return points_normed


def load_3d_data(file_path, num_points=10000, device="cuda", do_normalize=True):
    # Load NPZ point cloud directly like in the example
    pointcloud = np.load(file_path)
    verts = torch.Tensor(pointcloud['points']).to(device)
    rgb = torch.Tensor(pointcloud['colors']).to(device)

    # Subsample if needed
    if len(verts) > num_points:
        idx = torch.randperm(len(verts))[:num_points]
        verts = verts[idx]
        rgb = rgb[idx]

    if do_normalize:
        verts = bounding_sphere_normalize(verts)

    # Return both the points tensor and the Pointclouds object
    point_cloud = Pointclouds(points=[verts], features=[rgb])
    return point_cloud  # Return both



In [3]:
from itertools import islice

import torch
from pytorch3d.structures import Pointclouds
from pytorch3d.renderer import (
    look_at_view_transform,
    FoVOrthographicCameras,
    FoVPerspectiveCameras,
    PointsRasterizationSettings,
    PointsRenderer,
    PointsRasterizer,
    AlphaCompositor
)
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
import torchvision.transforms as T


class MultiViewPointCloudRenderer:
    def __init__(self, image_size=512, base_dist=20, base_elev=10, base_azim=0,
                 device=torch.device("cuda" if torch.cuda.is_available() else "cpu")):
        self.device = device
        self.image_size = image_size
        self.base_dist = base_dist
        self.base_elev = base_elev
        self.base_azim = base_azim
        self.to_tensor = T.Compose([
            T.Resize((image_size, image_size)),
            T.ToTensor()
        ])

        # Define the settings for rasterization
        self.raster_settings = PointsRasterizationSettings(
            image_size=image_size,
            radius=0.008,
            points_per_pixel=20
        )

        # Define all views relative to base view
        self.views = {
            'Default': (base_dist, base_elev, base_azim),
            'Y_90deg': (base_dist, base_elev, base_azim + 90),
            'Y_180deg': (base_dist, base_elev, base_azim + 180),
            'Y_-90deg': (base_dist, base_elev, base_azim - 90),
            'X_90deg': (base_dist, base_elev + 90, base_azim),
            'X_-90deg': (base_dist, base_elev - 90, base_azim),
        }


    def get_center_point(self, point_cloud):
        """Calculate the center point of the point cloud"""
        points = point_cloud.points_packed()
        center = torch.mean(points, dim=0)
        return center.unsqueeze(0)  # Add batch dimension

    def create_renderer(self, dist, elev, azim, center_point, background_color=(0, 0, 0)):
        """Create a renderer for specific camera parameters"""
        # Use the center point as the 'at' parameter
        R, T = look_at_view_transform(
            dist=dist,
            elev=elev,
            azim=azim,
            at=center_point,  # Look at the center of the point cloud
        )
        cameras = FoVPerspectiveCameras(
        device=self.device,
        R=R,
        T=T
        )

        rasterizer = PointsRasterizer(cameras=cameras, raster_settings=self.raster_settings)
        renderer = PointsRenderer(
            rasterizer=rasterizer,
            compositor=AlphaCompositor(background_color=background_color)
        )
        return renderer

    def load_background(self, background_path):
        bg_image = Image.open(background_path)
        bg_tensor = self.to_tensor(bg_image).to(self.device)
        return bg_tensor.permute(1, 2, 0)  # Convert to HWC format

    def render_all_views(self, point_cloud, n_views=6, background_path=None,background_color=(0, 0, 0)):
        images = {}
        center_point = self.get_center_point(point_cloud)

        if background_path:
            background = self.load_background(background_path)
        else:
            background = None

        for view_name, (dist, elev, azim) in islice(self.views.items(), n_views):
            renderer = self.create_renderer(dist, elev, azim, center_point,background_color=background_color)
            image = renderer(point_cloud)

            if background is not None:
                # Create binary mask from points
                mask = torch.any(image[0, ..., :3] > 0, dim=-1).float()
                mask = mask.unsqueeze(-1).expand(-1, -1, 3)
                composite = (image[0, ..., :3] * mask) + (background * (1 - mask))
                images[view_name] = composite
            else:
                images[view_name] = image[0, ..., :3]

        return images

In [4]:
import os
import numpy as np
import torch
import torchvision

def save_results(point_cloud, renderer,n_views,device,output_dir,output_name):

    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)

    rendered_images = renderer.render_all_views(point_cloud=point_cloud, n_views=n_views,background_color = (1,1,1))
    # Convert dictionary of images to tensor
    rendered_tensor = []
    for name, img in rendered_images.items():
        rendered_tensor.append(img.to(device))
    rendered_tensor = torch.stack(rendered_tensor)

    # Convert rendered images to CLIP format
    rendered_images = rendered_tensor.permute(0, 3, 1, 2)  # [B, H, W, C] -> [B, C, H, W]

    # Convert to uint8 range [0, 255]
    rendered_images = (rendered_images * 255).clamp(0, 255).to(torch.uint8)

    # Save rendered image using torchvision
    torchvision.utils.save_image(
        rendered_images.float() / 255.0,  # Convert back to [0,1] range
        os.path.join(output_dir, output_name),
         normalize=False  # We've already normalized the values
     )

In [19]:
device="cuda"

point_cloud = load_3d_data(
    "/content/output.npz",
    num_points=100000
)


renderer = MultiViewPointCloudRenderer(
    image_size=1024,
    base_dist=2.5,  # Your default view distance
    base_elev=10,  # Your default elevation
    base_azim=0,  # Your default azimuth
    device=device
)

save_results(
    point_cloud=point_cloud,
    renderer=renderer,
    n_views=6,
    output_dir="./output",
    output_name="point_cloud2.png",
    device=device
)

In [20]:
from pytorch3d.vis.plotly_vis import plot_scene
plot_scene({
    "Pointcloud": {
        "person": point_cloud
    }
})

Ground truth was saved as ply file so we convert npz

In [None]:
!pip install open3d numpy

In [18]:
import open3d as o3d
import numpy as np

# Load the .ply file
pcd = o3d.io.read_point_cloud("/content/gt_pointcloud_cut.ply")

# Extract points and colors
points = np.asarray(pcd.points)
colors = np.asarray(pcd.colors)

# Lighten the base colors (adjust factor as needed)
colors = colors * 1.5  # Increase brightness by 50%
colors = np.clip(colors, 0, 1)  # Clip values to the valid range [0, 1]

# Save to .npz
np.savez_compressed("output.npz", points=points, colors=colors)

calculation of IOU

In [26]:
import numpy as np

# Load highlighted (predicted) point cloud
highlighted_data = np.load("/content/highlighted_points.npz")

# Check for available keys
print(f"Available keys in highlighted_data: {highlighted_data.files}")

# Access the correct key (replace 'correct_key' with the actual key for labels)
# For example, if 'points' contains the label information:
highlighted_labels = highlighted_data["points"]  # Assuming 'points' is the key for labels. Update if needed

# Load ground truth point cloud
gt_data = np.load("/content/output.npz")
# Check for available keys
print(f"Available keys in gt_data: {gt_data.files}")

# Access the correct key (replace 'correct_key' with the actual key for labels)
# For example, if 'points' contains the label information:
gt_labels = gt_data["points"]  # Assuming 'points' is the key for labels. Update if needed

# Calculate IoU (which is also mIoU in this case)
intersection = np.sum((highlighted_labels == 1) & (gt_labels == 1))  # Assuming 1 represents the affordance label
union = np.sum((highlighted_labels == 1) | (gt_labels == 1))
iou = intersection / union if union > 0 else 1.0  # Handle empty union

print(f"IoU (mIoU): {iou}")

Keys in highlighted_points.npz: ['points', 'colors', 'probabilities']
Keys in output.npz: ['points', 'colors']
Shape of highlighted_probs: (2048, 2)
Shape of gt_data['points']: (2048, 3)

First 100 rows of highlighted_probs:
[[0.00187897 0.99812096]
 [0.2391436  0.76085645]
 [0.02320904 0.9767909 ]
 [0.04775775 0.9522423 ]
 [0.22267689 0.77732307]
 [0.00206222 0.99793774]
 [0.00181986 0.99818015]
 [0.15644771 0.8435523 ]
 [0.00266864 0.9973314 ]
 [0.00748238 0.9925176 ]
 [0.21640524 0.7835948 ]
 [0.0019095  0.99809045]
 [0.00175592 0.99824405]
 [0.0025733  0.9974267 ]
 [0.16696657 0.8330334 ]
 [0.00207661 0.9979234 ]
 [0.24048373 0.75951624]
 [0.0024362  0.99756384]
 [0.06685576 0.9331442 ]
 [0.00171871 0.99828136]
 [0.00222155 0.9977785 ]
 [0.00193887 0.9980611 ]
 [0.0102159  0.98978406]
 [0.00185264 0.99814737]
 [0.20102109 0.79897887]
 [0.00219144 0.9978085 ]
 [0.00199965 0.9980003 ]
 [0.00200784 0.99799216]
 [0.00180675 0.9981932 ]
 [0.00184784 0.9981522 ]
 [0.00837803 0.991622  ]


In [25]:
import numpy as np

# Load highlighted (predicted) point cloud
highlighted_data = np.load("/content/highlighted_points.npz")
highlighted_probs = highlighted_data["probabilities"]  # Accessing probabilities

# Assuming a probability threshold of 0.5 for affordance detection:
# Selecting the second column (index 1), which likely corresponds to the affordance class
highlighted_labels = (highlighted_probs[:, 1] >= 0.5).astype(int)

# Load ground truth point cloud
gt_data = np.load("/content/output.npz")
gt_points = gt_data["points"]  # Accessing points in ground truth

# We need to create a ground truth label array.
# Assuming all points in gt_data are affordance points:
gt_labels = np.ones_like(gt_points[:, 0], dtype=int) # Creating an array of 1s

# If ground truth has different affordance points,
# you need to load or generate the correct gt_labels.

# Calculate IoU
intersection = np.sum((highlighted_labels == 1) & (gt_labels == 1))
union = np.sum((highlighted_labels == 1) | (gt_labels == 1))
iou = intersection / union if union > 0 else 1.0

print(f"IoU (mIoU): {iou}")

IoU (mIoU): 1.0


In [30]:
import numpy as np

# Load prediction data
highlighted_data = np.load('highlighted_points.npz')
highlighted_probs = highlighted_data['probabilities']  # Probabilities for affordance and non-affordance
predicted_points = highlighted_data['points']  # Shape (2048, 3)

# Load ground truth data
gt_data = np.load('output.npz')
gt_points = gt_data['points']  # Shape (2048, 3)

# Define a threshold for spatial proximity (distance in 3D space)
proximity_threshold = 0.01  # Adjust based on your dataset (e.g., 1 cm in 3D space)

# Initialize ground truth labels (0 = non-affordance by default)
gt_labels = np.zeros(gt_points.shape[0], dtype=int)

# Assign affordance labels (1) based on proximity matching
for i, gt_point in enumerate(gt_points):
    distances = np.linalg.norm(predicted_points - gt_point, axis=1)
    if np.any(distances < proximity_threshold):
        gt_labels[i] = 1

# Extract affordance probabilities from predictions
affordance_probs = highlighted_probs[:, 1]  # Column 1 corresponds to affordance probabilities

# Threshold probabilities to classify points as affordance or non-affordance
threshold = 0.5
highlighted_labels = (affordance_probs >= threshold).astype(int)

# Calculate IoU
intersection = np.sum((highlighted_labels == 1) & (gt_labels == 1))
union = np.sum((highlighted_labels == 1) | (gt_labels == 1))
iou = intersection / union if union > 0 else 0.0

print(f"IoU: {iou}")
print(f"Sum of ground truth labels (gt_labels): {np.sum(gt_labels)}")
print(f"Sum of predicted affordance labels (highlighted_labels): {np.sum(highlighted_labels)}")


IoU: 1.0
Sum of ground truth labels (gt_labels): 2048
Sum of predicted affordance labels (highlighted_labels): 2048


In [49]:
import numpy as np

# Load prediction data
highlighted_data = np.load('highlighted_points.npz')
highlighted_points = highlighted_data['points']  # Predicted points (2048, 3)
highlighted_probs = highlighted_data['probabilities']  # Probabilities (2048, 2)

# Load ground truth data
gt_data = np.load('output.npz')
gt_points = gt_data['points']  # Ground truth points (2048, 3)

# Mock `labels_dict` for ground truth affordance labels (if not already loaded)
# Replace this with the actual ground truth labels from your dataset
labels_dict = {
    "cut": np.random.choice([0, 1], size=2048)  # Replace this with actual labels for "cut"
}

# Specify affordance to compute IoU (e.g., 'cut')
affordance = "cut"
gt_labels = labels_dict[affordance]  # Binary ground truth labels for the affordance

# Extract predicted labels for the same affordance
affordance_probs = highlighted_probs[:, 1]  # Probabilities for affordance class
threshold = 0.1  # Classification threshold
highlighted_labels = (affordance_probs >= threshold).astype(int)

# Calculate IoU
intersection = np.sum((highlighted_labels == 1) & (gt_labels == 1))
union = np.sum((highlighted_labels == 1) | (gt_labels == 1))
iou = intersection / union if union > 0 else 0.0

print(f"IoU for {affordance}: {iou}")


IoU for cut: 0.50634765625


In [46]:
import numpy as np

# Load prediction data
highlighted_data = np.load('highlighted_points.npz')
highlighted_points = highlighted_data['points']  # Predicted points (2048, 3)
highlighted_probs = highlighted_data['probabilities']  # Probabilities (2048, 2)

# Load ground truth data
gt_data = np.load('output.npz')
gt_points = gt_data['points']  # Ground truth points (2048, 3)

# Mock `labels_dict` for ground truth affordance labels (if not already loaded)
# Replace this with the actual ground truth labels for your dataset
labels_dict = {
    "cut": np.random.choice([0, 1], size=2048)  # Replace this with actual labels for "cut"
}

# Specify affordance to compute IoU (e.g., 'cut')
affordance = "cut"
gt_labels = labels_dict[affordance]  # Binary ground truth labels for the affordance

# Extract affordance probabilities
affordance_probs = highlighted_probs[:, 1]  # Probabilities for affordance class

# Evaluate IoU across a range of thresholds
thresholds = np.arange(0.0, 1.05, 0.05)  # Thresholds from 0.0 to 1.0 in steps of 0.05
iou_scores = []

for threshold in thresholds:
    # Generate binary predictions based on the current threshold
    highlighted_labels = (affordance_probs >= threshold).astype(int)

    # Calculate IoU
    intersection = np.sum((highlighted_labels == 1) & (gt_labels == 1))
    union = np.sum((highlighted_labels == 1) | (gt_labels == 1))
    iou = intersection / union if union > 0 else 0.0

    iou_scores.append(iou)
    print(f"Threshold: {threshold:.2f}, IoU: {iou:.4f}")

# Find the best threshold
best_threshold = thresholds[np.argmax(iou_scores)]
best_iou = max(iou_scores)

print(f"Best Threshold: {best_threshold:.2f}, Best IoU: {best_iou:.4f}")


Threshold: 0.00, IoU: 0.5093
Threshold: 0.05, IoU: 0.5093
Threshold: 0.10, IoU: 0.5093
Threshold: 0.15, IoU: 0.5093
Threshold: 0.20, IoU: 0.5093
Threshold: 0.25, IoU: 0.5093
Threshold: 0.30, IoU: 0.5093
Threshold: 0.35, IoU: 0.5093
Threshold: 0.40, IoU: 0.5093
Threshold: 0.45, IoU: 0.5093
Threshold: 0.50, IoU: 0.5093
Threshold: 0.55, IoU: 0.5093
Threshold: 0.60, IoU: 0.5093
Threshold: 0.65, IoU: 0.5093
Threshold: 0.70, IoU: 0.5093
Threshold: 0.75, IoU: 0.5093
Threshold: 0.80, IoU: 0.4685
Threshold: 0.85, IoU: 0.4453
Threshold: 0.90, IoU: 0.4298
Threshold: 0.95, IoU: 0.4121
Threshold: 1.00, IoU: 0.0000
Best Threshold: 0.00, Best IoU: 0.5093
