In [1]:
import os
import pdb
import torch
import numpy as np
from torch.utils.data import Dataset, DataLoader, random_split
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from mpl_toolkits.mplot3d import Axes3D
import torch.nn as nn
import random
import copy
import math
import ipdb

# Pipelines (a.k.a parts of the Neural Network)
from Pipelines.kitti_loader import KITTIDataset
from Pipelines.pillarizer import PillarFeatureNet, Pillarization, PseudoImageDataset
from Pipelines.backbone import BackBone
from Pipelines.detection_head import DetectionHead
from Pipelines.anchors import Box2D, Anchor
#from Pipelines.loss import PointPillarLoss

from Utils.transformations import transform_to_canvas, transform_to_grid, map_to_img
from Utils.iou import calculate_iou
from Utils.collate import normalize_annotations
from Utils.boxes import create_boxes_tensor # FIXME: Should be in visualization instead

# Visualization tools:
from Visualization.visz_pointcloud_w_label import plot_point_cloud_with_bboxes_o3d
from Visualization.visz_bboxes import visualize_batch_bounding_boxes


# Some Neural Network Parameters:
AUG_DIM = 9
MAX_POINTS_PER_PILLAR = 100
MAX_FILLED_PILLARS = 12000
X_MIN = 0.0
X_MAX = 70.4
Y_MIN = -40.0
Y_MAX = 40.0
Z_MIN = -3.0
Z_MAX = 1.0
PILLAR_SIZE = (0.16, 0.16)
DESIRED_CLASSES = ['Car'] # More classes can be added here
SCALE_FACTOR = 1.5
H = 500
W = 440


ANCHORS = torch.tensor([[3.9, 1.6, 1.56, -1, 0], # Anchors as tensor: (height, width, height, z_center, orientation)
                       [1.6, 3.9, 1.56, -1, 1.5708],
                       [0.8, 0.6, 1.73, -0.6, 0],
                       [0.6, 0.8, 1.73, -0.6, 1.5708]]
                       )

mapped_anchors = ANCHORS.detach().clone()
mapped_anchors[:,0:2] /= PILLAR_SIZE[0]


# Define a dictionary to map attributes to their indices
attributes_idx = {
    'norm_x': 7,
    'norm_y': 8,
    'norm_z': 9,
    'norm_h': 10,
    'norm_w': 11,
    'norm_l': 12,
}

for anchor_tensor in mapped_anchors: # NOTE: This is regardless of the batch, it is for all the training and testing
    anchor = Anchor(width=anchor_tensor[1], height=anchor_tensor[0])
    anchor.create_anchor_grid(H,W) # Creates grid
    anchor.create_anchors()
    break # FIXME: Get rid of this
    #anchors_list.append(anchor)



print(f'Can I can use GPU now? -- {torch.cuda.is_available()}')

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.
Can I can use GPU now? -- True


In [2]:
'''Create data loaders'''

train_pointclouds_dir = '/home/adlink/Documents/ECE-57000/ClassProject/Candidate2/PointPillars/dataset/kitti/training/velodyne_reduced'
train_labels_dir = '/home/adlink/Documents/ECE-57000/ClassProject/Candidate2/PointPillars/dataset/kitti/training/label_2'

small_train_pointclouds_dir = '/home/adlink/Documents/ECE-57000/ClassProject/Candidate2/PointPillars/dataset/kitti/training/small_train_velodyne'
small_train_labels_dir = '/home/adlink/Documents/ECE-57000/ClassProject/Candidate2/PointPillars/dataset/kitti/training/small_labels_velodyne'

mini_train_pointclouds_dir = '/home/adlink/Documents/ECE-57000/ClassProject/Candidate2/PointPillars/dataset/kitti/training/mini_train_velodyne'
mini_train_labels_dir = '/home/adlink/Documents/ECE-57000/ClassProject/Candidate2/PointPillars/dataset/kitti/training/mini_label_velodyne'


test_pointclouds_dir = '/home/adlink/Documents/ECE-57000/ClassProject/Candidate2/PointPillars/dataset/kitti/testing/velodyne_reduced'


# IMPORTANT: Set to CPU for pillarization otherwise, expect GPU memory to overflow
device =  torch.device('cpu')


# Create a collate function to handle variable-sized labels:
def collate_batch(batch):
    point_clouds, annotations = zip(*batch)
    point_clouds = torch.stack(point_clouds, dim=0)
    normalized_annotations = normalize_annotations(annotations, pillar_size=PILLAR_SIZE,
        x_lims=(X_MIN, X_MAX), y_lims=(Y_MIN, Y_MAX))
    
    return point_clouds, normalized_annotations


train_set = KITTIDataset(pointcloud_dir=small_train_pointclouds_dir, labels_dir=small_train_labels_dir)
        
# Create the dataset and DataLoader
dataset = PseudoImageDataset(pointcloud_dir=small_train_pointclouds_dir, device=device, kitti_dataset=train_set, aug_dim=AUG_DIM, max_points_in_pillar=MAX_POINTS_PER_PILLAR,
                             max_pillars=MAX_FILLED_PILLARS, x_min=X_MIN, y_min=Y_MIN, z_min=Z_MIN, x_max = X_MAX, y_max=Y_MAX,
                             z_max = Z_MAX, pillar_size=PILLAR_SIZE)

train_loader = DataLoader(dataset, batch_size=4, shuffle=False, collate_fn=collate_batch) # FIXME: Set batch to 4 again

In [3]:

class PointPillarLoss(nn.Module):
    def __init__(self, alpha=0.25, gamma=2.0, beta_loc = 2.0, beta_cls = 1.0):
        super(PointPillarLoss, self).__init__()
        self.smooth_l1_loss = nn.SmoothL1Loss()
        self.alpha = alpha
        self.gamma = gamma
        self.beta_cls = beta_cls
        self.beta_loc = beta_loc


    def forward(self, regression_targets, classification_targets_dict, 
                gt_boxes_tensor, loc, size, clf, occupancy, angle, heading, anchor):
        
        '''
        Inputs: 
        loc -- size (batch_size, n_anchors, 3, H, W)
        size -- size (batch_size, n_anchors, 3, H, W) 
        clf -- size (batch_size, n_anchors, 3, H, W)
        regression_targets -- tensor of size (batch_size, n_boxes, 2) with the indices of the best matching anchors
        gt_boxes_tensor -- size (bs, n_boxes, 4)
        '''

        da = torch.sqrt(anchor.width**2 + anchor.height**2)

        # Initialize the predictions
        batch_size, n_boxes = regression_targets.shape[:2]
        x_pred = torch.zeros(batch_size, n_boxes, dtype=loc.dtype)
        y_pred = torch.zeros(batch_size, n_boxes, dtype=loc.dtype)
        dx_tensor = torch.zeros(batch_size, n_boxes, dtype=loc.dtype)
        dy_tensor = torch.zeros(batch_size, n_boxes, dtype=loc.dtype)
        dw_tensor = torch.zeros(batch_size, n_boxes, dtype=loc.dtype)
        dl_tensor = torch.zeros(batch_size, n_boxes, dtype=loc.dtype)

     
        # Classification loss:
        '''background probs -- dict{batch: prob_loss}'''
        background_focal_loss = 0.0
        n_classification_target = 0

        for b in range(batch_size):
            for n_target, cls_target in enumerate(classification_targets_dict[b]):
                x_idx = classification_targets_dict[b][n_target][1] #(n_box, x, y)
                y_idx = classification_targets_dict[b][n_target][2]
                '''clf -- size (batch_size, n_anchors, 3, H, W)'''
                clf_val = clf[b][0][0][y_idx][x_idx]
                # Apply focal loss
                background_focal_loss += -torch.log(clf_val)*self.alpha*(1 - clf_val)**self.gamma
                n_classification_target += 1

        if batch_size*n_boxes != 0.0:
            background_focal_loss /= n_classification_target
        else:
            background_focal_loss = 0.0
            print(f'Division by zero encountered on background!')   


        # Regression loss:
        car_focal_loss = 0.0
        for b in range(batch_size):
            for n in range(n_boxes):

                x_idx = regression_targets[b, n, 0].long()  # Ensure the indices are long type
                y_idx = regression_targets[b, n, 1].long()  # Ensure the indices are long type
                x_pred[b, n] = loc[b, 0, 0, y_idx, x_idx]  # Indexing y first as it corresponds to H dimension
                y_pred[b, n] = loc[b, 0, 1, y_idx, x_idx]  # Indexing y first as it corresponds to H dimension
                w_gt = gt_boxes_tensor[b, n, 3] - gt_boxes_tensor[b, n, 1]
                l_gt = gt_boxes_tensor[b, n, 2] - gt_boxes_tensor[b, n, 0]
                x_gt = gt_boxes_tensor[b, n, 0] + w_gt/2
                y_gt = gt_boxes_tensor[b, n, 1] - l_gt/2
                dx_tensor[b, n] = (x_gt - x_pred[b,n]) / da 
                dy_tensor[b, n] = (y_gt - y_pred[b,n]) / da 
                # Sizes:
                if (w_gt != 0.0):
                    dw_tensor[b, n] = torch.log((w_gt / torch.abs(size[b, 0, 0, y_idx, x_idx])))
                if (l_gt != 0.0):
                    dl_tensor[b, n] = torch.log((l_gt / torch.abs(size[b, 0, 1, y_idx, x_idx])))

                #print(f'Added to dw: {dw_tensor[b, n]} its denominator was : {size[b, 0, 0, y_idx, x_idx]}')
                #print(f'Added to dl: {dl_tensor[b, n]} its denominator was: {size[b, 0, 1, y_idx, x_idx]}')

                # Classification loss for cars:
                car_prob = clf[b, 0, 1, y_idx, x_idx]
                car_focal_loss += -torch.log(car_prob)*self.alpha*(1 - car_prob)**self.gamma


        if batch_size*n_boxes != 0.0:
            car_focal_loss /= batch_size*n_boxes
        else: 
            print(f'Division by zero encountered on cars!')
            car_focal_loss = 0.0 


        # Calculate regression loss:
        loc_loss_x = self.smooth_l1_loss(dx_tensor, torch.zeros_like(dx_tensor))
        loc_loss_y = self.smooth_l1_loss(dy_tensor, torch.zeros_like(dx_tensor))
        width_loss = self.smooth_l1_loss(dw_tensor, torch.zeros_like(dw_tensor))
        length_loss = self.smooth_l1_loss(dl_tensor, torch.zeros_like(dl_tensor))

        # Calculate classification loss:
        total_loc_loss = loc_loss_x + loc_loss_y + width_loss + length_loss

        # Calculate regression loss:
        total_loss = self.beta_loc*total_loc_loss + self.beta_cls*(background_focal_loss + car_focal_loss)

        return total_loss
    

'''Set up the neural network for training'''

class PointPillarsModel(nn.Module):
    def __init__(self):
        super(PointPillarsModel, self).__init__()
        self.backbone = BackBone(in_channels=64, out_channels=64, device=torch.device('cuda'))
        self.detection_head = DetectionHead(in_channels=384, grid_size_x=500, grid_size_y=440, num_anchors=1, 
                num_classes=2, device=torch.device('cuda'))

    def forward(self, x):
        # Forward pass through backbone and detection head
        features = self.backbone(x)
        loc, size, clf, occupancy, angle, heading = self.detection_head(features)
        return loc, size, clf, occupancy, angle, heading


'''
# Declare model:
backbone = BackBone(in_channels=64, out_channels=64, device=torch.device('cuda'))

detection_head = DetectionHead(device=torch.device('cuda'), in_channels=384, grid_size_x=500, 
                        grid_size_y=440, num_anchors=1, num_classes=2) 

#optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)'''

#def get_classification_targets(self, iou_tensor, feature_map_size, 
#                               background_lower_threshold=0.05, background_upper_threshold=0.25):
"""
Generates classification targets based on IoU thresholds for each anchor.

Parameters:
iou_tensor -- tensor of IoU values, shape (batch_size, n_boxes, num_anchors_x, num_anchors_y)
feature_map_size -- size of the feature map grid (H, W)
foreground_threshold -- IoU threshold to consider an anchor as a positive match (foreground)
background_lower_threshold -- lower IoU threshold for considering an anchor as a negative match (background)
background_upper_threshold -- upper IoU threshold for considering an anchor as a negative match (background)

Returns:
A dictionary with keys as batch indices and values as lists of (box_index, feature_map_x_index, feature_map_y_index, class_label)
"""

n_epochs = 7
model = PointPillarsModel()
loss_fn = PointPillarLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
loss = 0.0

for epoch in range(n_epochs):
    print(f'Epoch: {epoch}')
    train_set = KITTIDataset(pointcloud_dir=small_train_pointclouds_dir, labels_dir=small_train_labels_dir)
        
    # Create the dataset and DataLoader

    dataset = PseudoImageDataset(pointcloud_dir=small_train_pointclouds_dir, device=device, kitti_dataset=train_set, aug_dim=AUG_DIM, max_points_in_pillar=MAX_POINTS_PER_PILLAR,
                             max_pillars=MAX_FILLED_PILLARS, x_min=X_MIN, y_min=Y_MIN, z_min=Z_MIN, x_max = X_MAX, y_max=Y_MAX,
                             z_max = Z_MAX, pillar_size=PILLAR_SIZE)
    
    train_loader = DataLoader(dataset, batch_size=4, shuffle=True, collate_fn=collate_batch)
    model.train()
    
    for batch_idx, (pseudo_images, batched_labels) in enumerate(train_loader):

        gt_boxes_tensor = create_boxes_tensor(batched_labels, attributes_idx)
        
        # Check if gt_boxes_tensor is empty for the current batch
        if gt_boxes_tensor.nelement() == 0:
            print(f'Encountered an empty element on the batch')
            continue
        
        # Get the roi indices:
        roi_indices = anchor.get_ROI_indices(gt_boxes_tensor=gt_boxes_tensor, scale_factor=1.5, 
                    feature_map_size=(H,W))


        # Get IoU tensor and regression targets:
        iou_tensor = anchor.calculate_batch_iou(gt_boxes_tensor) 
        '''IoU tensor (batch_size, n_boxes, num_anchors_x, num_anchors_y)'''


        regression_targets_tensor = anchor.get_regression_targets_tensor(iou_tensor, (H,W), threshold=0.5)

        classification_targets_dict = anchor.get_classification_targets(iou_tensor=iou_tensor, feature_map_size=(H,W),
                                    background_lower_threshold=0.05, background_upper_threshold=0.25)
        
        '''Enable gradients'''
        optimizer.zero_grad()

        loc, size, clf, occupancy, angle, heading = model(pseudo_images)

        
        loss = loss_fn(regression_targets=regression_targets_tensor, classification_targets_dict=classification_targets_dict,
        gt_boxes_tensor = gt_boxes_tensor, loc=loc, size=size, clf=clf, occupancy=occupancy, angle=angle, heading=heading,
        anchor=anchor)

        print(f'Loss: {loss}')
        # Backpropagation
        loss.backward()
        optimizer.step()

    

Epoch: 0
File loaded: /home/adlink/Documents/ECE-57000/ClassProject/Candidate2/PointPillars/dataset/kitti/training/small_train_velodyne/000506.bin


  return torch.from_numpy(point_cloud)


Loading point cloud number 665
File loaded: /home/adlink/Documents/ECE-57000/ClassProject/Candidate2/PointPillars/dataset/kitti/training/small_train_velodyne/000040.bin
Loading point cloud number 574
File loaded: /home/adlink/Documents/ECE-57000/ClassProject/Candidate2/PointPillars/dataset/kitti/training/small_train_velodyne/000248.bin
Loading point cloud number 610
File loaded: /home/adlink/Documents/ECE-57000/ClassProject/Candidate2/PointPillars/dataset/kitti/training/small_train_velodyne/000770.bin
Loading point cloud number 421
Loss: 18.80075454711914
File loaded: /home/adlink/Documents/ECE-57000/ClassProject/Candidate2/PointPillars/dataset/kitti/training/small_train_velodyne/000356.bin
Loading point cloud number 500
File loaded: /home/adlink/Documents/ECE-57000/ClassProject/Candidate2/PointPillars/dataset/kitti/training/small_train_velodyne/000396.bin
Loading point cloud number 431
File loaded: /home/adlink/Documents/ECE-57000/ClassProject/Candidate2/PointPillars/dataset/kitti/tra

In [None]:
# Initialization
model = ...  # Your complete PointPillars model
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
loss_fn = PointPillarLoss()
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=15, gamma=0.8)

# Training loop
for epoch in range(num_epochs):
    model.train()
    for batch_data in train_dataloader:
        optimizer.zero_grad()
        
        # Perform pillarization and forward pass through the model
        pillars, coords, ... = pillarize(batch_data)
        loc, size, clf, ... = model(pillars, coords)
        
        # Generate targets using your Anchor class
        regression_targets, classification_targets = anchor.generate_targets(batch_data)
        
        # Compute loss
        loss = loss_fn(regression_targets, classification_targets, batch_data['gt_boxes'], loc, size, clf, ...)
        
        # Backpropagation
        loss.backward()
        optimizer.step()