<a href="https://colab.research.google.com/github/gkadusumilli/Voxelnet/blob/master/VoxelNet_implementation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#LiDAR point-cloud based 3D object detection implementation with colab

Note: Runtime > change runtime > GPU


Link to the article explaining crucial topics can be found [here](https://medium.com/@gkadusumilli/lidar-point-cloud-based-3d-object-detection-implementation-with-colab-part-1-of-2-e3999ea8fdd4)


In [None]:
%cd /content/drive/My Drive

###Clone the VoxelNet repo

In [None]:
!git clone https://github.com/gkadusumilli/Voxelnet.git

#changing the current working directory
%cd /content/drive/My Drive/Voxelnet

###Build the necessary files

In [None]:
!python setup.py build_ext --inplace

###Unzip the dataset folder stored in the drive

* Steps to download KITTI dataset, crop and process can be found [here](https://github.com/gkadusumilli/Voxelnet/blob/master/VoxelNet_data_creation.ipynb)

In [None]:
#stored the dataset in AI folder named data_lidar, feel free to rename as you need
!unzip "/content/drive/My Drive/AI/data_lidar.zip"

###editing config.py python file to change the path directory, if needed (optional step)

1. pycat command will be used to view the files 

**To edit the config.py**
1. Copy the python script as displayed with pycat command
2. paste in the %%writefile and change the pathdirectory and run the cell

In [None]:
%pycat /content/drive/My Drive/Voxelnet/config.py


In [None]:
%%writefile ./config.py

from easydict import EasyDict as edict

__cfg__ = edict()

# for dataset dir
__cfg__.DATA_DIR = '/content/drive/My Drive/Voxelnet/crop_data'
__cfg__.KITTY_EVAL_SCRIPT = "kitti_eval/launch_test.sh"
__cfg__.CALIB_DIR = ''

# selected object
__cfg__.DETECT_OBJECT = 'Car'  # Pedestrian/Cyclist
__cfg__.NUM_ANCHORS_PER_CELL = 2

if __cfg__.DETECT_OBJECT == 'Car':
    __cfg__.MAX_POINT_NUMBER = 35
    __cfg__.Z_MIN = -3
    __cfg__.Z_MAX = 1
    __cfg__.Y_MIN = -40
    __cfg__.Y_MAX = 40
    __cfg__.X_MIN = 0
    __cfg__.X_MAX = 70.4
    __cfg__.VOXEL_X_SIZE = 0.2
    __cfg__.VOXEL_Y_SIZE = 0.2
    __cfg__.VOXEL_Z_SIZE = 0.4
    __cfg__.VOXEL_POINT_COUNT = 35
    __cfg__.INPUT_WIDTH = int((__cfg__.X_MAX - __cfg__.X_MIN) / __cfg__.VOXEL_X_SIZE)
    __cfg__.INPUT_HEIGHT = int((__cfg__.Y_MAX - __cfg__.Y_MIN) / __cfg__.VOXEL_Y_SIZE)
    __cfg__.INPUT_DEPTH = int((__cfg__.Z_MAX - __cfg__.Z_MIN) / __cfg__.VOXEL_Z_SIZE)
    __cfg__.LIDAR_COORD = [0, 40, 3]
    __cfg__.FEATURE_RATIO = 2
    __cfg__.FEATURE_WIDTH = int(__cfg__.INPUT_WIDTH / __cfg__.FEATURE_RATIO)
    __cfg__.FEATURE_HEIGHT = int(__cfg__.INPUT_HEIGHT / __cfg__.FEATURE_RATIO)
else:
    __cfg__.MAX_POINT_NUMBER = 45
    __cfg__.Z_MIN = -3
    __cfg__.Z_MAX = 1
    __cfg__.Y_MIN = -20
    __cfg__.Y_MAX = 20
    __cfg__.X_MIN = 0
    __cfg__.X_MAX = 48
    __cfg__.VOXEL_X_SIZE = 0.2
    __cfg__.VOXEL_Y_SIZE = 0.2
    __cfg__.VOXEL_POINT_COUNT = 45
    __cfg__.INPUT_WIDTH = int((__cfg__.X_MAX - __cfg__.X_MIN) / __cfg__.VOXEL_X_SIZE)
    __cfg__.INPUT_HEIGHT = int((__cfg__.Y_MAX - __cfg__.Y_MIN) / __cfg__.VOXEL_Y_SIZE)
    __cfg__.INPUT_DEPTH = int((__cfg__.Z_MAX - __cfg__.Z_MIN) / __cfg__.VOXEL_Z_SIZE)
    __cfg__.FEATURE_RATIO = 2
    __cfg__.LIDAR_COORD = [0, 20, 3]
    __cfg__.FEATURE_WIDTH = int(__cfg__.INPUT_WIDTH / __cfg__.FEATURE_RATIO)
    __cfg__.FEATURE_HEIGHT = int(__cfg__.INPUT_HEIGHT / __cfg__.FEATURE_RATIO)


__cfg__.SCENE_SIZE = [__cfg__.Z_MAX - __cfg__.Z_MIN, __cfg__.Y_MAX- __cfg__.Y_MIN, __cfg__.X_MAX - __cfg__.X_MIN]
__cfg__.VOXEL_SIZE = [__cfg__.VOXEL_Z_SIZE, __cfg__.VOXEL_Y_SIZE, __cfg__.VOXEL_X_SIZE]
__cfg__.GRID_SIZE = [int(A/B) for A,B in zip(__cfg__.SCENE_SIZE, __cfg__.VOXEL_SIZE)]
__cfg__.MAP_SHAPE = [__cfg__.FEATURE_HEIGHT, __cfg__.FEATURE_WIDTH]

__cfg__.IMG_WIDTH = 1242
__cfg__.IMG_HEIGHT = 375
__cfg__.IMG_CHANNEL = 3


# set the log image scale factor
__cfg__.BV_LOG_FACTOR = 4

# For the VFE layer
__cfg__.VFE_OUT_DIMS = [32,128]
__cfg__.VFE_FINAl_OUT_DIM = 128

# cal mean from train set
__cfg__.MATRIX_P2 = ([[719.787081,    0.,            608.463003, 44.9538775],
                  [0.,            719.787081,    174.545111, 0.1066855],
                  [0.,            0.,            1.,         3.0106472e-03],
                  [0.,            0.,            0.,         0]])

# cal mean from train set
__cfg__.MATRIX_T_VELO_2_CAM = ([
    [7.49916597e-03, -9.99971248e-01, -8.65110297e-04, -6.71807577e-03],
    [1.18652889e-02, 9.54520517e-04, -9.99910318e-01, -7.33152811e-02],
    [9.99882833e-01, 7.49141178e-03, 1.18719929e-02, -2.78557062e-01],
    [0, 0, 0, 1]
])
# cal mean from train set
__cfg__.MATRIX_R_RECT_0 = ([
    [0.99992475, 0.00975976, -0.00734152, 0],
    [-0.0097913, 0.99994262, -0.00430371, 0],
    [0.00729911, 0.0043753, 0.99996319, 0],
    [0, 0, 0, 1]
])


# Faster-RCNN/SSD Hyper params
if __cfg__.DETECT_OBJECT == 'Car':
    # car anchor
    __cfg__.ANCHOR_L = 3.9
    __cfg__.ANCHOR_W = 1.6
    __cfg__.ANCHOR_H = 1.56
    __cfg__.ANCHOR_Z = -1.0 - __cfg__.ANCHOR_H/2
    __cfg__.RPN_POS_IOU = 0.6
    __cfg__.RPN_NEG_IOU = 0.45

elif __cfg__.DETECT_OBJECT == 'Pedestrian':
    # pedestrian anchor
    __cfg__.ANCHOR_L = 0.8
    __cfg__.ANCHOR_W = 0.6
    __cfg__.ANCHOR_H = 1.73
    __cfg__.ANCHOR_Z = -0.6 - __cfg__.ANCHOR_H/2
    __cfg__.RPN_POS_IOU = 0.5
    __cfg__.RPN_NEG_IOU = 0.35

if __cfg__.DETECT_OBJECT == 'Cyclist':
    # cyclist anchor
    __cfg__.ANCHOR_L = 1.76
    __cfg__.ANCHOR_W = 0.6
    __cfg__.ANCHOR_H = 1.73
    __cfg__.ANCHOR_Z = -0.6 - __cfg__.ANCHOR_H/2
    __cfg__.RPN_POS_IOU = 0.5
    __cfg__.RPN_NEG_IOU = 0.35

# for rpn nms
__cfg__.RPN_NMS_POST_TOPK = 20
__cfg__.RPN_NMS_THRESH = 0.1
__cfg__.RPN_SCORE_THRESH = 0.96


__cfg__.CORNER2CENTER_AVG = True  # average version or max version


cfg = __cfg__

###Training
The argument parse details can be found in train.py 

In [None]:
!python train.py \
--strategy="all" \
--n_epochs=16 \
--batch_size=2 \
--learning_rate=0.001 \
--small_addon_for_BCE=1e-6 \
--max_gradient_norm=5 \
--alpha_bce=1.5 \
--beta_bce=1 \
--huber_delta=3 \
--dump_vis="no" \
--data_root_dir="/content/drive/My Drive/Voxelnet/crop_data" \
--model_dir="model" \
--model_name="model6" \
--dump_test_interval=3 \
--summary_interval=2 \
--summary_val_interval=40 \
--summary_flush_interval=20 \
--ckpt_max_keep=10 \

###Visualizing the log files using tensorboard

In [None]:
%load_ext tensorboard

%tensorboard --logdir summary_logdir

###Evaluating the model

In [None]:
!python predict.py \
--strategy="all" \
--batch_size=2 \
--dump_vis="yes" \
--data_root_dir="../DATA_DIR/T_DATA/" \
--dataset_to_test="validation" \
--model_dir="model" \
--model_name="model6" \
--ckpt_name="" \