In [1]:
import matplotlib.pyplot as plt

In [1]:
import numpy as np

In [2]:
from pathlib import Path

In [3]:
import yaml

In [4]:
import pandas as pd

In [5]:
from tqdm.notebook import tqdm

In [6]:
from sklearn import preprocessing

In [7]:
data_config_path = Path('/home/fusionresearch/AliThesis/lidar-bonnetal/train/tasks/semantic/config/filtered-semantic/labels/filtered-semantic.yaml')

In [8]:
labels_config = yaml.safe_load(open(data_config_path, 'r'))["labels"]

In [9]:
dataset_root_dir = Path('/home/fusionresearch/AliThesis/FilteredSemanticKitti/dataset/sequences/')

## Get Sequnces

In [10]:
sequences = [i.name for i in sorted(dataset_root_dir.glob("*")) if i.is_dir()]

In [11]:
sequences

['00',
 '01',
 '02',
 '03',
 '04',
 '05',
 '06',
 '07',
 '08',
 '09',
 '10',
 '11',
 '12',
 '13',
 '14',
 '15',
 '16',
 '17',
 '18',
 '19',
 '20',
 '21']

In [12]:
def get_label_data(label_file_path):
    """
    read labels from files, remove instance labels
    Returns:
        array with semantic class label for each point
    """
    label_data = np.load(str(label_file_path))
    label_data = label_data.reshape((-1))
    sem_label = label_data & 0xFFFF  
    return sem_label

In [13]:
sequences_results = []
for seq in sequences:
    seq_labels_path = dataset_root_dir / seq / "labels"
    
    # get names of all label files for sequence
    seq_labels_files = [i.name for i in sorted(seq_labels_path.glob("*.npy"))]
    
    for label_file in tqdm(seq_labels_files, f"getting data for seq {seq}"):
        label_file_path =  dataset_root_dir / seq / "labels" / label_file
        
        # read label data
        label_data = get_label_data(label_file_path)
        
        # get unique labels in array and their count
        classes_in_file, occurences = np.unique(label_data, return_counts=True)
        classes_numbers = list(zip(classes_in_file, occurences))
        
        # create a dictionary for each scan
        scan_result = {}
        scan_result['id'] = f"{seq}_{label_file_path.stem}"
        scan_result["seq"] = seq
        for c, o in classes_numbers:
            class_name = labels_config[c]
            scan_result[class_name] = o
        
        # add scan dictionary to lsist
        sequences_results.append(scan_result)


HBox(children=(FloatProgress(value=0.0, description='getting data for seq 00', max=4541.0, style=ProgressStyle…




HBox(children=(FloatProgress(value=0.0, description='getting data for seq 01', max=1101.0, style=ProgressStyle…




HBox(children=(FloatProgress(value=0.0, description='getting data for seq 02', max=4661.0, style=ProgressStyle…




HBox(children=(FloatProgress(value=0.0, description='getting data for seq 03', max=801.0, style=ProgressStyle(…




HBox(children=(FloatProgress(value=0.0, description='getting data for seq 04', max=271.0, style=ProgressStyle(…




HBox(children=(FloatProgress(value=0.0, description='getting data for seq 05', max=2761.0, style=ProgressStyle…




HBox(children=(FloatProgress(value=0.0, description='getting data for seq 06', max=1101.0, style=ProgressStyle…




HBox(children=(FloatProgress(value=0.0, description='getting data for seq 07', max=1101.0, style=ProgressStyle…




HBox(children=(FloatProgress(value=0.0, description='getting data for seq 08', max=4071.0, style=ProgressStyle…




HBox(children=(FloatProgress(value=0.0, description='getting data for seq 09', max=1591.0, style=ProgressStyle…




HBox(children=(FloatProgress(value=0.0, description='getting data for seq 10', max=1201.0, style=ProgressStyle…




HBox(children=(FloatProgress(value=1.0, bar_style='info', description='getting data for seq 11', max=1.0, styl…




HBox(children=(FloatProgress(value=1.0, bar_style='info', description='getting data for seq 12', max=1.0, styl…




HBox(children=(FloatProgress(value=1.0, bar_style='info', description='getting data for seq 13', max=1.0, styl…




HBox(children=(FloatProgress(value=1.0, bar_style='info', description='getting data for seq 14', max=1.0, styl…




HBox(children=(FloatProgress(value=1.0, bar_style='info', description='getting data for seq 15', max=1.0, styl…




HBox(children=(FloatProgress(value=1.0, bar_style='info', description='getting data for seq 16', max=1.0, styl…




HBox(children=(FloatProgress(value=1.0, bar_style='info', description='getting data for seq 17', max=1.0, styl…




HBox(children=(FloatProgress(value=1.0, bar_style='info', description='getting data for seq 18', max=1.0, styl…




HBox(children=(FloatProgress(value=1.0, bar_style='info', description='getting data for seq 19', max=1.0, styl…




HBox(children=(FloatProgress(value=1.0, bar_style='info', description='getting data for seq 20', max=1.0, styl…




HBox(children=(FloatProgress(value=1.0, bar_style='info', description='getting data for seq 21', max=1.0, styl…




In [17]:
statistics_dataframe = pd.DataFrame(sequences_results)

In [18]:
statistics_dataframe

Unnamed: 0,id,seq,unlabeled,car,road,parking,sidewalk,building,fence,vegetation,...,motorcycle,person,moving-person,moving-bicyclist,other-ground,truck,moving-other-vehicle,moving-bus,moving-truck,bicyclist
0,00_000000,00,444.0,1483.0,5798,1752.0,1246.0,3597.0,248.0,3685.0,...,,,,,,,,,,
1,00_000001,00,498.0,1588.0,5697,1673.0,1173.0,3852.0,184.0,3546.0,...,,,,,,,,,,
2,00_000002,00,522.0,1798.0,5587,1436.0,980.0,4001.0,199.0,3506.0,...,,,,,,,,,,
3,00_000003,00,517.0,2023.0,5556,1198.0,832.0,4185.0,150.0,3230.0,...,,,,,,,,,,
4,00_000004,00,516.0,2067.0,5557,1206.0,680.0,4353.0,137.0,2910.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23196,10_001196,10,76.0,1586.0,4890,,1598.0,1404.0,3039.0,4853.0,...,,,88.0,,,,,,1841.0,
23197,10_001197,10,79.0,1616.0,4888,,1626.0,1447.0,3008.0,4727.0,...,,,84.0,,,,,,1879.0,
23198,10_001198,10,78.0,1642.0,4903,,1669.0,1484.0,2969.0,4606.0,...,,,81.0,,,,,,1907.0,
23199,10_001199,10,80.0,1669.0,4939,,1724.0,1517.0,2961.0,4468.0,...,,,80.0,,,,,,1939.0,


In [19]:
statistics_dataframe.to_csv("filtered_semantic_labels_stats.csv")