<a href="https://colab.research.google.com/github/cagBRT/PointCloud/blob/main/Image_to_Point_Cloud_with_Point_E.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

https://segments.ai/blog/image-to-pointcloud-with-point-e


#  Point-E
Point-E is a deep learning model created by OpenAI that transforms a text caption into a colored point cloud. More specifically, Point-E consists of three steps, each handled by a dedicated ML model:

Generate an image conditioned on a text caption
Create a point cloud (1024 points) conditioned on the image
Upsample the point cloud (to 4096 points) conditioned on the image and low-resolution point cloud

In [None]:
!pip install git+https://github.com/openai/point-e -q

In [None]:
from PIL import Image
import torch
from tqdm.auto import tqdm

from point_e.diffusion.configs import DIFFUSION_CONFIGS, diffusion_from_config
from point_e.diffusion.sampler import PointCloudSampler
from point_e.models.download import load_checkpoint
from point_e.models.configs import MODEL_CONFIGS, model_from_config
from point_e.util.plotting import plot_point_cloud


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
print("Creating base model")
base_name = "base300M"  # Use base1B for better results
base_model = model_from_config(MODEL_CONFIGS[base_name], device)
base_model.eval()
base_diffusion = diffusion_from_config(DIFFUSION_CONFIGS[base_name])

In [None]:
print("Creating upsample model")
upsampler_model = model_from_config(MODEL_CONFIGS["upsample"], device)
upsampler_model.eval()
upsampler_diffusion = diffusion_from_config(DIFFUSION_CONFIGS["upsample"])

In [None]:
print("Downloading base checkpoint")
base_model.load_state_dict(load_checkpoint(base_name, device))

In [None]:
print("Downloading upsampler checkpoint")
upsampler_model.load_state_dict(load_checkpoint("upsample", device))

In [None]:
# Combine the image-to-point cloud and upsampler model
sampler = PointCloudSampler(
    device=device,
    models=[base_model, upsampler_model],
    diffusions=[base_diffusion, upsampler_diffusion],
    num_points=[1024, 4096 - 1024],
    aux_channels=["R", "G", "B"],
    guidance_scale=[3.0, 3.0],
)

# Create a point cloud based on an image. <br>

This process typically results in higher-quality point clouds

In [None]:
# Load an image to condition on
img_path = "/content/treeImage.png" # Fill in your image path
img = Image.open(img_path)

In [None]:
# Produce a sample from the model (this takes around 3 minutes on base300M)
samples = None
for x in tqdm(
    sampler.sample_batch_progressive(batch_size=1, model_kwargs=dict(images=[img]))
):
    samples = x
pc = sampler.output_to_point_clouds(samples)[0]