In [1]:
import numpy as np
import torch
from torch import nn

import spconv
import sys
sys.path.append('/home/jhultman/Projects/tmp/PV-RCNN/Pointnet2.PyTorch/')
from pointnet2.pointnet2_utils import ball_query, gather_operation, furthest_point_sample

In [2]:
class PvrcnnConfig:
    C_in = 4
    n_keypoints = 2048
    strides = [1, 2, 4, 8]
    max_num_points = 5
    max_voxels = 40000
    voxel_size = [0.05, 0.05, 0.1]
    grid_bounds = [0, -40, -3, 64, 40, 1]
    sample_fpath = './sample.bin'

In [3]:
class VSA_MLP(nn.Module):
    """
    Represents G in equation 2.
    """

    def __init__(self, C_in, channels):
        """
        C_in: incoming channels.
        channels: length-3 list of channels in each layer.
        """
        super(VSA_MLP, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(C_in, channels[0], bias=True),
            nn.BatchNorm1d(channels[0]),
            nn.ReLU(inplace=True),
            nn.Linear(channels[0], channels[1], bias=True),
            nn.BatchNorm1d(channels[1]),
            nn.ReLU(inplace=True),
            nn.Linear(channels[1], channels[2], bias=True),
        )

    def forward(self, voxel_set):
        x = self.layers(voxel_set)
        x = x.max(2)
        return x

In [19]:
class VoxelSetAbstraction(nn.Module):
    """
    For each keypoint, convert its location to
    continuous voxel index coordinates. Then fetch
    voxels within ball query.
    """

    def __init__(self, radius, nsample, voxel_size, volume_offset):
        """
        radius: maximum distance for ball query, measured in raw point cloud coordinates.
        nsample: maximum number of neighbors to return in ball query.
        voxel_size: length-3 tensor describing size of atomic voxel, accounting for stride.
        volume_offset: length-3 tensor describing coordinate offset of voxel grid.
        """
        super(VoxelSetAbstraction, self).__init__()
        self.radius = radius
        self.nsample = nsample
        self.voxel_size = voxel_size
        self.volume_offset = volume_offset

    def to_raw_coordinates(self, voxel_index):
        """
        voxel_index: shape (B, Tk, 4) array of coordinates
        return: shape (B, Tk, 3) array of locations in raw coordinates.
        """
        xyz = voxel_index[..., 1:].float() * self.voxel_size
        xyz = (xyz + self.volume_offset).unsqueeze(0)
        return xyz

    def get_neighbors(self, keypoint_location, voxel_feature, voxel_location):
        neighbor_index = ball_query(self.radius, self.nsample, voxel_location, keypoint_location)
        neighbor_feature = gather_operation(voxel_feature, neighbor_index)
        neighbor_location = gather_operation(voxel_location, neighbor_index)
        return neighbor_feature, neighbor_location

    def combine_features(self, neighbor_feature, neighbor_location, keypoint_location):
        """Form neighborhood feature set (equation 1)."""
        offset_location = neighbor_location - keypoint_location
        combined_feature = torch.cat((neighbor_feature, offset_location), dim=2)
        return combined_feature

    def forward(self, keypoint_location, voxel_feature, voxel_index):
        voxel_location = self.to_raw_coordinates(voxel_index)
        neighbor_feature, neighbor_location = self.get_neighbors(keypoint_location, voxel_feature, voxel_location)
        feature = self.combine_features(neighbor_feature, neighbor_location, keypoint_location)
        return feature

In [20]:
class CNN_3D(nn.Module):
    """
    Placeholder sparse 3D CNN with four blocks:
    
        block_0: [1600, 1280, 41] -> [1600, 1280, 41]
        block_1: [1600, 1280, 41] -> [800, 640, 21]
        block_2: [800, 640, 21]   -> [400, 320, 11]
        block_3: [400, 320, 11]   -> [200, 160, 6]
    
    Returns feature volumes strided 1x, 2x, 4x, 8x.
    """
    
    def __init__(self, C_in, shape):
        super(CNN_3D, self).__init__()
        self.blocks = spconv.SparseSequential(
            spconv.SparseConv3d(C_in, 16, 3, 1, padding=0, bias=False),
            spconv.SparseConv3d(16, 16, 3, 2, padding=1, bias=False),
            spconv.SparseConv3d(16, 32, 3, 2, padding=1, bias=False),
            spconv.SparseConv3d(32, 64, 3, 2, padding=1, bias=False),
        )
        self.shape = shape

    def forward(self, features, coordinates, batch_size):
        x0 = spconv.SparseConvTensor(
            features, coordinates.int(), self.shape, batch_size,
        )
        x1 = self.blocks[0](x0)
        x2 = self.blocks[1](x1)
        x3 = self.blocks[2](x2)
        x4 = self.blocks[3](x3)
        x = [x1, x2, x3, x4]
        return x

In [21]:
class PV_RCNN(nn.Module):
    """
    Carry out feature computation described in PV-RCNN paper.
    """
    
    def __init__(self, num_keypoint):
        """
        num_keypoint: number of keypoints
        """
        super(PV_RCNN, self).__init__()
        self.num_keypoint = num_keypoint
        pass
    
    def forward(self, raw_point):
        keypoint_index = furthest_point_sample(raw_point, self.num_keypoint)
        keypoint = gather_operation(raw_point, keypoint_index)

In [22]:
cfg = PvrcnnConfig()

voxel_generator = spconv.utils.VoxelGenerator(
    voxel_size=cfg.voxel_size, 
    point_cloud_range=cfg.grid_bounds,
    max_voxels=cfg.max_voxels,
    max_num_points=cfg.max_num_points,
)

points = np.fromfile(cfg.sample_fpath, dtype=np.float32).reshape(-1, cfg.C_in)
features, coordinates, voxel_population = voxel_generator.generate(points)
coordinates = np.pad(coordinates, ((0, 0), (1, 0)), mode="constant", constant_values=0)

from_numpy = lambda x: torch.from_numpy(x).cuda()
points, features, coordinates, voxel_population = map(
    from_numpy, (points, features, coordinates, voxel_population))
features = features.view(-1, cfg.C_in)

shape = np.r_[voxel_generator.grid_size[::-1]] + [1, 0, 0] # [1280, 1600, 40] -> [41, 1600, 1280]
cnn_3d = CNN_3D(C_in=cfg.C_in, shape=shape).cuda()
out = cnn_3d(features.view(-1, 4), coordinates, batch_size=1)

In [23]:
vsa = VoxelSetAbstraction(
    radius=0.2, nsample=25, 
    voxel_size=torch.cuda.FloatTensor([0.05, 0.05, 0.1]),
    volume_offset=torch.cuda.FloatTensor([0, 0, 0]),
)

In [53]:
indices_keypoint = furthest_point_sample(points.unsqueeze(0), cfg.n_keypoints)
keypoint_location = points[indices_keypoint.long()]

idx = 2
voxel_size = torch.cuda.FloatTensor(cfg.voxel_size) * 2 ** (idx)
volume_offset = torch.cuda.FloatTensor([0, 0, 0])

feature_volume = out[idx]
voxel_feature = feature_volume.features
voxel_index = feature_volume.indices

xyz = voxel_index[..., 1:].float() * voxel_size
xyz = (xyz + volume_offset).unsqueeze(0)

neighbor_index = ball_query(0.4, 8, xyz, keypoint_location)
neighbor_feature = gather_operation(
    voxel_feature.unsqueeze(0), 
    neighbor_index.squeeze(0),
)

In [55]:
neighbor_index.shape

torch.Size([1, 2048, 8])

In [54]:
neighbor_feature.shape

torch.Size([2048, 43009, 8])

In [52]:
neighbor_index.shape

torch.Size([1, 2048, 8])

In [None]:
neighbor_feature.

In [42]:
voxel_feature.shape

torch.Size([1, 415043, 16])

In [36]:
voxel_feature.shape

torch.Size([415043, 16])

In [32]:
neighbor_index.shape

torch.Size([1, 2048, 25])

In [30]:
voxel_feature.shape

torch.Size([415043, 16])

In [12]:
xyz.shape

torch.Size([415043, 3])

In [None]:
#def forward(self, keypoint_location, voxel_feature, voxel_index):