<a href="https://colab.research.google.com/github/ghr8635/E2E-DriveAI-ROS2-based-Modular-Framework-for-Autonomous-Vehicle-Control/blob/main/self_built_point_pillar_architecture_(simple_data).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
!pip install open3d

Collecting open3d
  Downloading open3d-0.18.0-cp310-cp310-manylinux_2_27_x86_64.whl.metadata (4.2 kB)
Collecting dash>=2.6.0 (from open3d)
  Downloading dash-2.18.2-py3-none-any.whl.metadata (10 kB)
Collecting configargparse (from open3d)
  Downloading ConfigArgParse-1.7-py3-none-any.whl.metadata (23 kB)
Collecting ipywidgets>=8.0.4 (from open3d)
  Downloading ipywidgets-8.1.5-py3-none-any.whl.metadata (2.3 kB)
Collecting addict (from open3d)
  Downloading addict-2.4.0-py3-none-any.whl.metadata (1.0 kB)
Collecting pyquaternion (from open3d)
  Downloading pyquaternion-0.9.9-py3-none-any.whl.metadata (1.4 kB)
Collecting werkzeug>=2.2.3 (from open3d)
  Downloading werkzeug-3.0.6-py3-none-any.whl.metadata (3.7 kB)
Collecting dash-html-components==2.0.0 (from dash>=2.6.0->open3d)
  Downloading dash_html_components-2.0.0-py3-none-any.whl.metadata (3.8 kB)
Collecting dash-core-components==2.0.0 (from dash>=2.6.0->open3d)
  Downloading dash_core_components-2.0.0-py3-none-any.whl.metadata (2.9 

In [9]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from torch.utils.data import Dataset, DataLoader

**Define the PointPillars Feature Extraction Model**

In [10]:
class PillarFeatureEncoder(nn.Module):
    def __init__(self, in_channels, out_channels, num_points_per_pillar, grid_x, grid_y):
        super(PillarFeatureEncoder, self).__init__()
        self.num_points_per_pillar = num_points_per_pillar
        self.grid_x, self.grid_y = grid_x, grid_y
        # Simple linear layer to encode pillar features
        self.fc = nn.Linear(in_channels, out_channels)

    def forward(self, pillars):
        # Flatten and encode features
        pillars = self.fc(pillars)  # Shape: (batch, grid_x, grid_y, num_points, out_channels)
        return pillars.mean(dim=2)  # Reduce across point dimension to get (batch, grid_x, grid_y, out_channels)

class BackboneNetwork(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(BackboneNetwork, self).__init__()
        # Simple 2D CNN layers for feature extraction
        self.conv1 = nn.Conv2d(in_channels, 64, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1)
        self.conv3 = nn.Conv2d(128, out_channels, kernel_size=3, stride=2, padding=1)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = self.conv3(x)  # Shape: (batch, out_channels, grid_x//4, grid_y//4)
        return x

class PointPillarsFeatureExtractor(nn.Module):
    def __init__(self, in_channels=4, out_channels=256, num_points_per_pillar=32, grid_x=100, grid_y=100):
        super(PointPillarsFeatureExtractor, self).__init__()
        self.pfe = PillarFeatureEncoder(in_channels, out_channels, num_points_per_pillar, grid_x, grid_y)
        self.backbone = BackboneNetwork(out_channels, out_channels)

    def forward(self, x):
        pillar_features = self.pfe(x)  # Shape: (batch, grid_x, grid_y, out_channels)
        pillar_features = pillar_features.permute(0, 3, 1, 2)  # Shape: (batch, out_channels, grid_x, grid_y)
        feature_map = self.backbone(pillar_features)
        return feature_map

**Prepare a Simple Dataset for Training which mimics pcd data**

In [11]:
class SyntheticPointCloudDataset(Dataset):
    def __init__(self, num_samples, grid_x=100, grid_y=100, num_points_per_pillar=32, in_channels=4):
        self.num_samples = num_samples
        self.grid_x, self.grid_y = grid_x, grid_y
        self.num_points_per_pillar = num_points_per_pillar
        self.in_channels = in_channels

    def __len__(self):
        return self.num_samples

    def __getitem__(self, idx):
        # Generate random pillars with shape: (grid_x, grid_y, num_points, in_channels)
        pillars = np.random.rand(self.grid_x, self.grid_y, self.num_points_per_pillar, self.in_channels).astype(np.float32)
        return torch.tensor(pillars)

# Parameters
num_samples = 100
dataset = SyntheticPointCloudDataset(num_samples)
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)


**Training Loop (Optional, to learn feature extraction patterns)**

In [None]:
# Instantiate model, optimizer, and criterion
model = PointPillarsFeatureExtractor()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()

# Simple Training Loop
epochs = 5
for epoch in range(epochs):
    for batch_idx, pillars in enumerate(dataloader):
        optimizer.zero_grad()

        # Forward pass
        features = model(pillars)  # Extracted features, shape: (batch, out_channels, grid_x//4, grid_y//4)

        # Dummy target: here we're using the output itself as target for demonstration
        target = features.clone().detach()  # This is just for illustrative purposes
        loss = criterion(features, target)

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        print(f"Epoch [{epoch+1}/{epochs}], Batch [{batch_idx+1}/{len(dataloader)}], Loss: {loss.item():.4f}")


Epoch [1/5], Batch [1/25], Loss: 0.0000
Epoch [1/5], Batch [2/25], Loss: 0.0000
Epoch [1/5], Batch [3/25], Loss: 0.0000
Epoch [1/5], Batch [4/25], Loss: 0.0000
Epoch [1/5], Batch [5/25], Loss: 0.0000
Epoch [1/5], Batch [6/25], Loss: 0.0000
Epoch [1/5], Batch [7/25], Loss: 0.0000
Epoch [1/5], Batch [8/25], Loss: 0.0000
Epoch [1/5], Batch [9/25], Loss: 0.0000
Epoch [1/5], Batch [10/25], Loss: 0.0000
Epoch [1/5], Batch [11/25], Loss: 0.0000
Epoch [1/5], Batch [12/25], Loss: 0.0000
Epoch [1/5], Batch [13/25], Loss: 0.0000
Epoch [1/5], Batch [14/25], Loss: 0.0000
Epoch [1/5], Batch [15/25], Loss: 0.0000
Epoch [1/5], Batch [16/25], Loss: 0.0000
Epoch [1/5], Batch [17/25], Loss: 0.0000
Epoch [1/5], Batch [18/25], Loss: 0.0000
Epoch [1/5], Batch [19/25], Loss: 0.0000
Epoch [1/5], Batch [20/25], Loss: 0.0000
Epoch [1/5], Batch [21/25], Loss: 0.0000
Epoch [1/5], Batch [22/25], Loss: 0.0000
Epoch [1/5], Batch [23/25], Loss: 0.0000
Epoch [1/5], Batch [24/25], Loss: 0.0000
Epoch [1/5], Batch [25/25

**Saving Model**

In [None]:
# Define the file path where you want to save the model
model_save_path = "/content/drive/MyDrive/ROS2-Modular-Framework-for-End-to-End-Autonomous-Vehicle-Control-from-Raw-Sensor-Data/self_built_point_pillar.pth"

# After training is complete, save the model
torch.save(model.state_dict(), model_save_path)
print(f"Model saved to {model_save_path}")


Model saved to /content/drive/MyDrive/ROS2-Modular-Framework-for-End-to-End-Autonomous-Vehicle-Control-from-Raw-Sensor-Data/self_built_point_pillar.pth


**Note: Upto this point the point pillar based feature extracting model is trained with simply generated dataset and saved. The next part is for inference where a pcd file is processed and provided to the model for inference.**

In [4]:
import open3d as o3d
import numpy as np

# Load the PCD file
pcd = o3d.io.read_point_cloud('/content/drive/MyDrive/ROS2-Modular-Framework-for-End-to-End-Autonomous-Vehicle-Control-from-Raw-Sensor-Data/lidar_01__2023-06-02-21-28-09-321.pcd')

# Convert the point cloud into a NumPy array (N, 3), where N is the number of points
points = np.asarray(pcd.points)

print(f"Loaded point cloud with {points.shape[0]} points")


Loaded point cloud with 65536 points


In [7]:
def preprocess_point_cloud(points, grid_x, grid_y, voxel_size=0.1):
    """
    Convert 3D point cloud to pillar representation.

    Args:
    - points: (N, 3) numpy array where N is the number of points.
    - grid_x: Number of pillars along the X axis.
    - grid_y: Number of pillars along the Y axis.
    - voxel_size: Size of each voxel (grid cell).

    Returns:
    - pillars: The preprocessed point cloud as pillar features.
    """
    x_min, y_min = np.min(points[:, 0]), np.min(points[:, 1])
    x_max, y_max = np.max(points[:, 0]), np.max(points[:, 1])

    # Create grid for the 2D space
    grid_x_range = np.linspace(x_min, x_max, grid_x)
    grid_y_range = np.linspace(y_min, y_max, grid_y)

    # Initialize pillar features
    pillars = np.zeros((grid_x, grid_y, 32, 4))  # (grid_x, grid_y, max_points_per_pillar, feature_dim)

    for point in points:
        # Calculate the 2D grid coordinates for the point
        x_idx = int((point[0] - x_min) / voxel_size)
        y_idx = int((point[1] - y_min) / voxel_size)

        # Ensure indices are within the grid bounds
        x_idx = min(x_idx, grid_x - 1)
        y_idx = min(y_idx, grid_y - 1)

        # Add the point features (x, y, z, intensity) to the pillar
        pillar = pillars[x_idx, y_idx]

        # If there is an empty space in the pillar (0 entries), insert the point
        empty_slot_idx = np.where(pillar[:, 0] == 0)[0]

        if len(empty_slot_idx) > 0:
            # Fill the first empty slot (use the first one)
            pillar_idx = empty_slot_idx[0]
            pillars[x_idx, y_idx, pillar_idx] = np.append(point, 1)  # Add intensity as 1 for now
        else:
            # If no empty slots, replace the point in the pillar (you could add a logic to choose the best point)
            pillar_idx = np.argmin(np.linalg.norm(pillar[:, :3], axis=1))  # Choose the point closest to the origin of the pillar
            pillars[x_idx, y_idx, pillar_idx] = np.append(point, 1)  # Replace with the new point's features

    return pillars

# Set grid resolution and voxel size
grid_x, grid_y = 100, 100  # Resolution of your grid
preprocessed_pillars = preprocess_point_cloud(points, grid_x, grid_y)


**Loading the Model for Inference**

In [12]:
import torch

# Initialize the model (assuming you already have the model architecture and weights loaded)
model = PointPillarsFeatureExtractor()

# Load the saved weights
model_save_path = '/content/drive/MyDrive/ROS2-Modular-Framework-for-End-to-End-Autonomous-Vehicle-Control-from-Raw-Sensor-Data/self_built_point_pillar.pth'
model.load_state_dict(torch.load(model_save_path))

# Set the model to evaluation mode
model.eval()

# Convert the preprocessed point cloud into a torch tensor
input_tensor = torch.tensor(preprocessed_pillars, dtype=torch.float32).unsqueeze(0)  # Add batch dimension

# Perform inference (no gradient calculation since we're not training)
with torch.no_grad():
    feature_map = model(input_tensor)

# Process the feature_map for your task (if needed)
print("Feature map extracted:")
print(feature_map.shape)  # Check the output shape


  model.load_state_dict(torch.load(model_save_path))


Feature map extracted:
torch.Size([1, 256, 25, 8])
