In [None]:
import torch.optim as optim
import torch
import numpy as np

import os
import torch
from torch.utils.data import Dataset
import torchvision.transforms as transforms

from torch.utils.data import Dataset, DataLoader

import timm
import torch
import torch.nn as nn
import torch.nn.functional as F

from tqdm.notebook import tqdm
from timm.models.vision_transformer import PatchEmbed

In [None]:
pcd = np.fromfile('./preprocess/000000.bin', dtype=np.float32).reshape(-1, 9) # 9 channels: x, y, z, intensity, flag, R, G, B, label

In [None]:
pcd_ori = np.fromfile('./000000.bin', dtype=np.float32).reshape(-1, 4) # 9 channels: x, y, z, intensity

In [None]:
### Projection
class ScanProjection(object):
    '''
    Project the 3D point cloud to 2D data with range projection

    Adapted from A. Milioto et al. https://github.com/PRBonn/lidar-bonnetal
    '''

    def __init__(self, proj_w, proj_h):
        # params of proj img size
        self.proj_w = proj_w
        self.proj_h = proj_h


    def doProjection(self, pointcloud: np.ndarray):

        # get depth of all points
        depth = np.linalg.norm(pointcloud[:, :3], 2, axis=1)
        # get point cloud components
        x = pointcloud[:, 0]
        y = pointcloud[:, 1]
        z = pointcloud[:, 2]
        # label is the last column of pointcloud
        label = pointcloud[:,-1]
        # remove the last column from pointcloud
        pointcloud = pointcloud[:, :-1]
        # remove flag, R, G, and B from pointcloud
        # pointcloud = pointcloud[:, :-4]  # now only has [x, y, z, intensity]
        # get angles of all points
        yaw = -np.arctan2(y, -x)
        proj_x = 0.5 * (yaw / np.pi + 1.0)  # in [0.0, 1.0]
        #breakpoint()
        new_raw = np.nonzero((proj_x[1:] < 0.2) * (proj_x[:-1] > 0.8))[0] + 1
        proj_y = np.zeros_like(proj_x)
        proj_y[new_raw] = 1
        proj_y = np.cumsum(proj_y)
        # scale to image size using angular resolution
        proj_x = proj_x * self.proj_w - 0.001

        # round and clamp for use as index
        proj_x = np.maximum(np.minimum(
            self.proj_w - 1, np.floor(proj_x)), 0).astype(np.int32)
        # wrap proj_x so if proj_x < 1024 it will be added 1024, if proj_x >= 1024 it will be subtracted 1024
        proj_x = np.where(proj_x < 1024, proj_x + 1024, proj_x - 1024)
        proj_y = np.maximum(np.minimum(
            self.proj_h - 1, np.floor(proj_y)), 0).astype(np.int32)

        # order in decreasing depth
        indices = np.arange(depth.shape[0])
        order = np.argsort(depth)[::-1]
        depth = depth[order]
        indices = indices[order]
        pointcloud = pointcloud[order]
        proj_y = proj_y[order]
        proj_x = proj_x[order]
        label = label[order]

        # get projection result
        proj_range = np.full((self.proj_h, self.proj_w), -1, dtype=np.float32)
        proj_range[proj_y, proj_x] = depth

        proj_pointcloud = np.full((self.proj_h, self.proj_w, pointcloud.shape[1]), -1, dtype=np.float32)
        proj_pointcloud[proj_y, proj_x] = pointcloud

        proj_idx = np.full((self.proj_h, self.proj_w), -1, dtype=np.int32)
        proj_idx[proj_y, proj_x] = indices

        proj_label = np.full((self.proj_h, self.proj_w), 0, dtype=np.int32)
        proj_label[proj_y, proj_x] = label

        # create proj_tensor with cascade proj_pointcloud and proj_range
        # proj_pointcloud has size (64, 2048, 4)
        # proj_range has size (64, 2048)
        # print(f'proj_range_shape:{proj_range.shape}, proj_pointcloud_shape: {proj_pointcloud.shape}, proj_label_shape: {proj_label.shape}')
        proj_pointcloud = proj_pointcloud[0:48, 790:1250, :]  # cut the pointcloud to [H, W, C]
        proj_range =proj_range[0:48, 790:1250]  # cut the range to [H, W]
        proj_label = proj_label[0:48, 790:1250]
        # img = img[:, :, 0:48, 790:1250]  # cut the image to [C, H, W]
        # label = label[:, 0:48, 790:1250]  # cut the label to [H, W]

        proj_tensor = np.concatenate((proj_range[..., np.newaxis], proj_pointcloud), axis=-1) # [range, x, y, z, flag, R, G, B]
        return proj_tensor, proj_label
    

In [None]:
### DataLoader
class KITTISegmentationDataset(Dataset):
    def __init__(self, root_dir, sequences):
        self.root_dir = root_dir
        self.file_list = []
        for seq in sequences:
            seq_dir = os.path.join(root_dir, seq)
            assert os.path.exists(seq_dir), f"Sequence {seq} does not exist in {root_dir}"
            file_list = []
            pc_dir = os.path.join(seq_dir, 'preprocess')
            # Get the list of files (full path) in the point cloud directory
            file_list = [os.path.join(pc_dir, f) for f in os.listdir(pc_dir) if f.endswith('.bin')]
            self.file_list.extend(file_list)
        # Setup the projection parameters
        self.projection = ScanProjection(proj_w=2048, proj_h=64)
        # Define the learning map for semantic labels
        # This map is used to convert the original labels to a smaller set of classes
        self.learning_map = {0: 0, 1: 0, 10: 1, 11: 2, 13: 5, 15: 3, 16: 5, 18: 4, 20: 5,
            30: 6, 31: 7, 32: 8, 40: 9, 44: 10, 48: 11, 49: 12, 50: 13,
            51: 14, 52: 0, 60: 9, 70: 15, 71: 16, 72: 17, 80: 18, 81: 19,
            99: 0, 252: 1, 253: 7, 254: 6, 255: 8, 256: 5, 257: 5, 258: 4, 259: 5}
        # Create a mapping array with size large enough to cover the largest key
        self.max_key = max(self.learning_map.keys())
        self.map_array = np.zeros((self.max_key + 1,), dtype=np.int32)
        # Fill the mapping array with the learning map values
        for key, value in self.learning_map.items():
            self.map_array[key] = value
            
    # Read the point cloud data from binary files
    @staticmethod
    def readPCD(path):
        pcd = np.fromfile(path, dtype=np.float32).reshape(-1, 9) # 9 channels: x, y, z, intensity, flag, R, G, B, label
        return pcd
  
    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, idx):
        pc_path = self.file_list[idx]

        # Load binary data
        pc = self.readPCD(pc_path)  # x, y, z, intensity
        img, label = self.projection.doProjection(pc) # shape [H, W, C]
        # Map the labels using the learning map
        label = self.map_array[label]  # map to smaller set of classes
        img = torch.tensor(img).permute(2, 0, 1).float()  # to [C, H, W]
        label = torch.tensor(label).long()                # [H, W]
        # Normalize the tensor
        # mean = torch.tensor([12.12, 10.88, 0.23, -1.04, 0.21])
        # std = torch.tensor([12.32, 11.47, 6.91, 0.86, 0.16])
        # img = (img - mean[:, None, None]) / std[:, None, None]
        return img, label

In [None]:
dataset_val = KITTISegmentationDataset('../SemanticKITTI/dataset/sequences',['08'])
loader_val = DataLoader(dataset_val, batch_size=1, shuffle=False, num_workers=1)


In [None]:
import matplotlib.pyplot as plt
# get a sample from the dataset and plot it
for img, label in loader_val:
    print(img.shape, label.shape)
    # cut and center image so that x: 790->1250; y: 0-> 48

    print(img.shape, label.shape)
    # plot the first image, assume that the first channel is range, the last 3 channels are R, G, B
    proj_range = img[0, 0].numpy()  # get the range channel
    proj_rgb = img[0, 6:9].numpy()   # get the RGB channels
    # save the RGB image to a file
    proj_rgb = (proj_rgb * 255).astype(np.uint8)  # convert to uint8
    proj_rgb = proj_rgb.transpose(1, 2, 0)
    imwrite = plt.imsave('proj_rgb.png', proj_rgb)
    print(proj_range.shape, proj_rgb.shape)
    fig, axs = plt.subplots(2, 1, figsize=(16, 2))
    # no axis and make it bigger
    axs[0].axis('off')
    axs[0].imshow(proj_range, cmap='jet')
    axs[1].axis('off')
    axs[1].imshow(proj_rgb)
    break