## Human Detection in Point Cloud Data

In [7]:
import os
import numpy as np


def load_kitti_bin(bin_path):
    """Load a point cloud file from KITTI's binary format."""
    point_cloud = np.fromfile(bin_path, dtype=np.float32)
    return point_cloud.reshape(-1, 4)  # Reshape to N x 4 matrix (x, y, z, intensity)

def voxelize(point_cloud, grid_size=32):
    """
    Convert a point cloud into a fixed-size voxel grid.

    Args:
    - point_cloud (numpy array): Nx4 point cloud data.
    - grid_size (int): Size of the voxel grid.

    Returns:
    - voxel_grid (numpy array): 3D voxel grid.
    """
    # Define voxel grid boundaries based on the point cloud data
    min_bound = point_cloud.min(axis=0)[:3]  # x, y, z min values
    max_bound = point_cloud.max(axis=0)[:3]  # x, y, z max values

    # Calculate voxel size in each dimension
    voxel_size = (max_bound - min_bound) / grid_size

    # Convert points to voxel coordinates
    voxel_coords = ((point_cloud[:, :3] - min_bound) / voxel_size).astype(int)

    # Clip voxel coordinates to grid size
    voxel_coords = np.clip(voxel_coords, 0, grid_size-1)

    # Create an empty voxel grid
    voxel_grid = np.zeros((grid_size, grid_size, grid_size), dtype=np.uint8)

    # Fill the voxel grid based on voxel coordinates
    voxel_grid[voxel_coords[:, 0], voxel_coords[:, 1], voxel_coords[:, 2]] = 1

    return voxel_grid


def generate_labels(data_directory, threshold=1000):
    """
    Generate labels for point cloud data based on a voxelization threshold.

    Args:
    - data_directory (str): Path to the directory containing point cloud .bin files.
    - threshold (int): Threshold for filled voxels to label a point cloud as containing a human.

    Returns:
    - labels (list): List of labels (1 for human, 0 for non-human).
    """
    labels = []
    for file in sorted(os.listdir(data_directory)):
        file_path = os.path.join(data_directory, file)
        point_cloud = load_kitti_bin(file_path)
        voxel_grid = voxelize(point_cloud)
        label = 1 if voxel_grid.sum() > threshold else 0
        labels.append(label)
    return labels



In [8]:
import torch
from torch.utils.data import Dataset, DataLoader

class PointCloudDataset(Dataset):
    """Point Cloud dataset in voxelized format."""

    def __init__(self, data_directory, labels):
        """
        Args:
        - data_directory (str): Directory with all the point cloud .bin files.
        - labels (list): List of labels for each point cloud.
        """
        self.data_directory = data_directory
        self.labels = labels
        self.file_list = sorted(os.listdir(data_directory))

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, idx):
        # Load point cloud and convert to voxel grid
        file_path = os.path.join(self.data_directory, self.file_list[idx])
        point_cloud = load_kitti_bin(file_path)
        voxel_grid = voxelize(point_cloud)

        # Convert to PyTorch tensor
        voxel_tensor = torch.from_numpy(voxel_grid).float().unsqueeze(0)  # Add channel dimension
        label_tensor = torch.tensor(self.labels[idx], dtype=torch.float32)

        return voxel_tensor, label_tensor

# Create dataset and dataloader
data_directory = '/data'

# Generate labels for the dataset
labels = generate_labels(data_directory)
num_human_labels = sum(labels)
num_total_samples = len(labels)

dataset = PointCloudDataset(data_directory, labels)
dataloader = DataLoader(dataset, batch_size=8, shuffle=True)

# Check a sample from the dataset
sample_data, sample_label = next(iter(dataloader))
sample_data.shape, sample_label.shape


(torch.Size([8, 1, 32, 32, 32]), torch.Size([8]))

In [9]:
import torch.nn as nn
import torch.nn.functional as F

class Simple3DCNN(nn.Module):
    def __init__(self):
        super(Simple3DCNN, self).__init__()

        # 3D Convolutional layers
        self.conv1 = nn.Conv3d(1, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv3d(32, 64, kernel_size=3, stride=1, padding=1)

        # Dense layers
        self.fc1 = nn.Linear(64 * 8 * 8 * 8, 128)  # After two max pooling, the size becomes 8x8x8
        self.fc2 = nn.Linear(128, 1)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool3d(x, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool3d(x, 2)

        x = x.view(x.size(0), -1)  # Flatten
        x = F.relu(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))

        return x.squeeze(-1)

# Initialize the model
model = Simple3DCNN()

# Display the model architecture
model


Simple3DCNN(
  (conv1): Conv3d(1, 32, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
  (conv2): Conv3d(32, 64, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
  (fc1): Linear(in_features=32768, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=1, bias=True)
)

In [10]:
# Training parameters
epochs = 5
learning_rate = 0.001
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Loss and optimizer
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Move model to the specified device
model.to(device)

# Training loop
for epoch in range(epochs):
    model.train()  # Set model to training mode
    running_loss = 0.0
    for data, labels in dataloader:
        data, labels = data.to(device), labels.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(data)

        # Compute loss
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    average_loss = running_loss / len(dataloader)
    print(f"Epoch [{epoch+1}/{epochs}], Loss: {average_loss:.4f}")

print("Training complete.")


Epoch [1/5], Loss: 0.1038
Epoch [2/5], Loss: 0.0000
Epoch [3/5], Loss: 0.0000
Epoch [4/5], Loss: 0.0000
Epoch [5/5], Loss: 0.0000
Training complete.


In [11]:
# Set model to evaluation mode
model.eval()

# Store predictions and actual labels
predictions = []
true_labels = []

# Evaluate the model on some samples from the training data
with torch.no_grad():  # No gradient computation during evaluation
    for data, labels in dataloader:
        data, labels = data.to(device), labels.to(device)

        # Get model predictions
        outputs = model(data)

        # Convert predictions to binary labels
        predicted_labels = (outputs > 0.5).float()

        predictions.extend(predicted_labels.cpu().numpy())
        true_labels.extend(labels.cpu().numpy())

# Convert to numpy arrays for easier comparison
predictions = np.array(predictions)
true_labels = np.array(true_labels)

# Compute accuracy
accuracy = np.mean(predictions == true_labels)
accuracy


1.0

## Saving the Model

In [12]:

# Save model weights
model_save_path = "simple_3dcnn_weights.pth"
torch.save(model.state_dict(), model_save_path)
model_save_path


'simple_3dcnn_weights.pth'

## Loading the Model

In [13]:

# Load model weights
loaded_model = Simple3DCNN()
loaded_model.load_state_dict(torch.load(model_save_path))
loaded_model.to(device)
loaded_model.eval()  # Set model to evaluation mode


Simple3DCNN(
  (conv1): Conv3d(1, 32, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
  (conv2): Conv3d(32, 64, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
  (fc1): Linear(in_features=32768, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=1, bias=True)
)

## Testing the Loaded Model

In [14]:

# Test the loaded model on some samples from the training data
loaded_predictions = []
with torch.no_grad():
    for data, _ in dataloader:
        data = data.to(device)
        outputs = loaded_model(data)
        predicted_labels = (outputs > 0.5).float()
        loaded_predictions.extend(predicted_labels.cpu().numpy())
loaded_predictions = np.array(loaded_predictions)
loaded_accuracy = np.mean(loaded_predictions == true_labels)
loaded_accuracy


1.0