In [1]:
import matplotlib.pyplot as plt

In [2]:
import numpy as np

In [3]:
from pathlib import Path

In [4]:
import yaml

In [5]:
import pandas as pd

In [6]:
from tqdm import tqdm

In [7]:
from sklearn import preprocessing

In [10]:
data_config_path = Path('/home/mrafaat/AliThesis/lidar-bonnetal/train/tasks/semantic/config/filtered-semantic-remapped/labels/filtered-semantic-remapped.yaml')

In [11]:
labels_config = yaml.safe_load(open(data_config_path, 'r'))["labels"]

In [12]:
dataset_root_dir = Path('/raid/ali/AliThesis/FilteredSemanticKitti/dataset/sequences/')

## Get Sequnces

In [13]:
sequences = [i.name for i in sorted(dataset_root_dir.glob("*")) if i.is_dir()]

In [14]:
sequences

['00',
 '01',
 '02',
 '03',
 '04',
 '05',
 '06',
 '07',
 '08',
 '09',
 '10',
 '11',
 '12',
 '13',
 '14',
 '15',
 '16',
 '17',
 '18',
 '19',
 '20',
 '21']

In [15]:
def get_label_data(label_file_path):
    """
    read labels from files, remove instance labels
    Returns:
        array with semantic class label for each point
    """
    label_data = np.load(str(label_file_path))
    label_data = label_data.reshape((-1))
    sem_label = label_data & 0xFFFF  
    return sem_label

In [16]:
sequences_results = []
for seq in sequences:
    seq_labels_path = dataset_root_dir / seq / "labels"
    
    # get names of all label files for sequence
    seq_labels_files = [i.name for i in sorted(seq_labels_path.glob("*.npy"))]
    
    for label_file in tqdm(seq_labels_files, f"getting data for seq {seq}"):
        label_file_path =  dataset_root_dir / seq / "labels" / label_file
        
        # read label data
        label_data = get_label_data(label_file_path)
        
        # get unique labels in array and their count
        classes_in_file, occurences = np.unique(label_data, return_counts=True)
        classes_numbers = list(zip(classes_in_file, occurences))
        
        # create a dictionary for each scan
        scan_result = {}
        scan_result['id'] = f"{seq}_{label_file_path.stem}"
        scan_result["seq"] = seq
        for c, o in classes_numbers:
            class_name = labels_config[c]
            scan_result[class_name] = o
        
        # add scan dictionary to lsist
        sequences_results.append(scan_result)


getting data for seq 00: 100%|██████████| 4541/4541 [00:05<00:00, 901.23it/s]
getting data for seq 01: 100%|██████████| 1101/1101 [00:01<00:00, 1089.29it/s]
getting data for seq 02: 100%|██████████| 4661/4661 [00:05<00:00, 905.77it/s]
getting data for seq 03: 100%|██████████| 801/801 [00:00<00:00, 918.62it/s]
getting data for seq 04: 100%|██████████| 271/271 [00:00<00:00, 1719.67it/s]
getting data for seq 05: 100%|██████████| 2761/2761 [00:03<00:00, 895.54it/s]
getting data for seq 06: 100%|██████████| 1101/1101 [00:01<00:00, 893.78it/s]
getting data for seq 07: 100%|██████████| 1101/1101 [00:01<00:00, 908.06it/s]
getting data for seq 08: 100%|██████████| 4071/4071 [00:04<00:00, 904.29it/s]
getting data for seq 09: 100%|██████████| 1591/1591 [00:01<00:00, 915.63it/s]
getting data for seq 10: 100%|██████████| 1201/1201 [00:01<00:00, 904.60it/s]
getting data for seq 11: 0it [00:00, ?it/s]
getting data for seq 12: 0it [00:00, ?it/s]
getting data for seq 13: 0it [00:00, ?it/s]
getting data

In [17]:
statistics_dataframe = pd.DataFrame(sequences_results)

In [18]:
statistics_dataframe

Unnamed: 0,id,seq,unlabeled,car,road,parking,sidewalk,building,fence,vegetation,...,motorcycle,person,moving-person,moving-bicyclist,other-ground,truck,moving-other-vehicle,moving-bus,moving-truck,rider
0,00_000000,00,444.0,1483.0,5798,1752.0,1246.0,3597.0,248.0,3685.0,...,,,,,,,,,,
1,00_000001,00,498.0,1588.0,5697,1673.0,1173.0,3852.0,184.0,3546.0,...,,,,,,,,,,
2,00_000002,00,522.0,1798.0,5587,1436.0,980.0,4001.0,199.0,3506.0,...,,,,,,,,,,
3,00_000003,00,517.0,2023.0,5556,1198.0,832.0,4185.0,150.0,3230.0,...,,,,,,,,,,
4,00_000004,00,516.0,2067.0,5557,1206.0,680.0,4353.0,137.0,2910.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23196,10_001196,10,76.0,1586.0,4890,,1598.0,1404.0,3039.0,4853.0,...,,,88.0,,,,,,1841.0,
23197,10_001197,10,79.0,1616.0,4888,,1626.0,1447.0,3008.0,4727.0,...,,,84.0,,,,,,1879.0,
23198,10_001198,10,78.0,1642.0,4903,,1669.0,1484.0,2969.0,4606.0,...,,,81.0,,,,,,1907.0,
23199,10_001199,10,80.0,1669.0,4939,,1724.0,1517.0,2961.0,4468.0,...,,,80.0,,,,,,1939.0,


In [19]:
statistics_dataframe.to_csv("/home/mrafaat/AliThesis/lidar-bonnetal/train/tasks/semantic/dataset/filtered-semantic/filtered_semantic_remapped_labels_stats.csv")