In [8]:
import os
from collections import defaultdict

def analyze_dataset(directory):
    max_x, max_y, max_z = 0, 0, 0
    max_x_file, max_y_file, max_z_file = '', '', ''
    class_counts = defaultdict(int)
    class_dimensions = defaultdict(list)
    
    for filename in os.listdir(directory):
        if filename.endswith('.txt'):
            filepath = os.path.join(directory, filename)
            with open(filepath, 'r') as f:
                for line in f:
                    line = line.strip()
                    if line == '':
                        continue
                    fields = line.split()
                    if len(fields) != 15:
                        print(f"Line with unexpected number of fields ({len(fields)}): {line}")
                        continue
                    class_name = fields[0]
                    # dimensions: height, width, length
                    try:
                        h = float(fields[8])
                        w = float(fields[9])
                        l = float(fields[10])
                        x = float(fields[11])
                        y = float(fields[12])
                        z = float(fields[13])
                    except ValueError as e:
                        print(f"Error parsing line: {line}")
                        print(e)
                        continue

                    # Only count vehicles with non-zero dimensions
                    if h > 0 and w > 0 and l > 0:
                        # Update max distances and filenames
                        if abs(x) > max_x:
                            max_x = abs(x)
                            max_x_file = filename
                        if abs(y) > max_y:
                            max_y = abs(y)
                            max_y_file = filename
                        if abs(z) > max_z:
                            max_z = abs(z)
                            max_z_file = filename
                            
                        # Update class counts and dimensions
                        class_counts[class_name] += 1
                        class_dimensions[class_name].append((h, w, l))
        else:
            continue
    # Now compute average dimensions
    class_avg_dimensions = {}
    for class_name, dims_list in class_dimensions.items():
        h_list, w_list, l_list = zip(*dims_list)
        avg_h = sum(h_list) / len(h_list)
        avg_w = sum(w_list) / len(w_list)
        avg_l = sum(l_list) / len(l_list)
        class_avg_dimensions[class_name] = (avg_h, avg_w, avg_l)
    # Output the results
    print(f"Maximum x distance: {max_x} (in file '{max_x_file}')")
    print(f"Maximum y distance: {max_y} (in file '{max_y_file}')")
    print(f"Maximum z distance: {max_z} (in file '{max_z_file}')")
    print("\nNumber of objects per class:")
    for class_name, count in class_counts.items():
        print(f"{class_name}: {count}")
    print("\nAverage dimensions per class (height, width, length):")
    for class_name, dims in class_avg_dimensions.items():
        print(f"{class_name}: Height={dims[0]:.8f}, Width={dims[1]:.8f}, Length={dims[2]:.8f}")


In [9]:
analyze_dataset('/home/javier/datasets/rope3d-backup/training/label_2')
# analyze_dataset('/home/javier/datasets/kitti/training/label_2')



Maximum x distance: 68.4286897083 (in file '62531_fa2sd4a13East154_420_1625814455_1625815202_102_obstacle.txt')
Maximum y distance: 46.2082872934 (in file '62538_fa2sd4a16East154_420_1625809624_1625810158_33_obstacle.txt')
Maximum z distance: 199.474198417 (in file '1784_fa2sd4adatasetWest152_420_1621243901_1621244052_101_obstacle.txt')

Number of objects per class:
trafficcone: 792520
unknown_unmovable: 110141
car: 510701
motorcyclist: 119236
cyclist: 83240
pedestrian: 122095
truck: 13863
bus: 15747
van: 39897
tricyclist: 9771
barrow: 584
unknowns_movable: 37

Average dimensions per class (height, width, length):
trafficcone: Height=0.69741278, Width=0.24883562, Length=0.24749470
unknown_unmovable: Height=0.72289199, Width=1.29234053, Length=1.51686557
car: Height=1.28876225, Width=1.69396488, Length=4.25589252
motorcyclist: Height=1.41811302, Width=0.58210805, Length=1.67083611
cyclist: Height=1.43759932, Width=0.48926327, Length=1.52393096
pedestrian: Height=1.59624139, Width=0.4797