# 🧪 Test the PointcloudTokenizer
Imports `PointcloudTokenizer` which builds tokens for pointcloud patches.

Extracts the pretrained weights for the whole pretrained point2vec pipeline.

Obtains basic initial patch embeddings for a loaded pointcloud.

In [31]:
import sys
from pathlib import Path

# Add the src directory to the Python path
sys.path.append(str(Path().resolve().parent / 'src'))

import torch
import numpy as np

from ext.point2vec.point2vec.modules.pointnet import PointcloudTokenizer

print("✅ Both imports resolve!")


✅ Both imports resolve!


In [None]:
from ext.point2vec.point2vec.utils import checkpoint

checkpoint_path = "../src/ext/point2vec/pre_point2vec-epoch.799-step.64800.ckpt"

checkpoint = checkpoint.extract_model_checkpoint(checkpoint_path)
if checkpoint_path:
    print(checkpoint.keys())
    print("point2vec pretrained weights are loaded!")

dict_keys(['positional_encoding.0.weight', 'positional_encoding.0.bias', 'positional_encoding.2.weight', 'positional_encoding.2.bias', 'tokenizer.embedding.first_conv.0.weight', 'tokenizer.embedding.first_conv.1.weight', 'tokenizer.embedding.first_conv.1.bias', 'tokenizer.embedding.first_conv.1.running_mean', 'tokenizer.embedding.first_conv.1.running_var', 'tokenizer.embedding.first_conv.1.num_batches_tracked', 'tokenizer.embedding.first_conv.3.weight', 'tokenizer.embedding.first_conv.3.bias', 'tokenizer.embedding.second_conv.0.weight', 'tokenizer.embedding.second_conv.1.weight', 'tokenizer.embedding.second_conv.1.bias', 'tokenizer.embedding.second_conv.1.running_mean', 'tokenizer.embedding.second_conv.1.running_var', 'tokenizer.embedding.second_conv.1.num_batches_tracked', 'tokenizer.embedding.second_conv.3.weight', 'tokenizer.embedding.second_conv.3.bias', 'encoder.blocks.0.norm1.weight', 'encoder.blocks.0.norm1.bias', 'encoder.blocks.0.attn.qkv.weight', 'encoder.blocks.0.attn.qkv.bi

In [36]:
data = np.load('../data/pointclouds/02691156_10aa040f470500c6a66ef8df4909ded9_2048.npz')

# Check what's in the file
print("Keys in file:", list(data.keys()))

# Load the point cloud data (adjust key name as needed)
pointcloud_np = data['points'] 

# Convert to PyTorch tensor
pointcloud_tensor = torch.from_numpy(pointcloud_np)

print(f"Original numpy shape: {pointcloud_np.shape}")
print(f"Original numpy dtype: {pointcloud_np.dtype}")
print(f"Tensor shape: {pointcloud_tensor.shape}")
print(f"Tensor dtype: {pointcloud_tensor.dtype}")


# TODO: find a way to use tokenizer.embedding pretrained weights from loaded checkpoint
tokenizer = PointcloudTokenizer(64, 32, None ,384)

tokens, centers = tokenizer(pointcloud_tensor.reshape(1, 2048, 3)) 
print(f"Tokens shape: {tokens.shape}")
print(f"Tokens dtype: {tokens.dtype}")
print(f"Centers shape: {centers.shape}")
print(f"Centers dtype: {centers.dtype}")


Keys in file: ['points']
Original numpy shape: (2048, 3)
Original numpy dtype: float32
Tensor shape: torch.Size([2048, 3])
Tensor dtype: torch.float32
Tokens shape: torch.Size([1, 64, 384])
Tokens dtype: torch.float32
Centers shape: torch.Size([1, 64, 3])
Centers dtype: torch.float32
