In [3]:
import os
from collections import defaultdict

def analyze_dataset(directory):
    max_x, max_y, max_z = 0, 0, 0
    max_x_file, max_y_file, max_z_file = '', '', ''
    class_counts = defaultdict(int)
    class_dimensions = defaultdict(list)
    
    for filename in os.listdir(directory):
        if filename.endswith('.txt'):
            filepath = os.path.join(directory, filename)
            with open(filepath, 'r') as f:
                for line in f:
                    line = line.strip()
                    if line == '':
                        continue
                    fields = line.split()
                    if len(fields) != 15:
                        print(f"Line with unexpected number of fields ({len(fields)}): {line}")
                        continue
                    class_name = fields[0]
                    # dimensions: height, width, length
                    try:
                        h = float(fields[8])
                        w = float(fields[9])
                        l = float(fields[10])
                        x = float(fields[11])
                        y = float(fields[12])
                        z = float(fields[13])
                    except ValueError as e:
                        print(f"Error parsing line: {line}")
                        print(e)
                        continue
                    # Update max distances and filenames
                    if abs(x) > max_x:
                        max_x = abs(x)
                        max_x_file = filename
                    if abs(y) > max_y:
                        max_y = abs(y)
                        max_y_file = filename
                    if abs(z) > max_z:
                        max_z = abs(z)
                        max_z_file = filename
                    # Update class counts
                    class_counts[class_name] += 1
                    # Update class dimensions
                    class_dimensions[class_name].append((h, w, l))
        else:
            continue
    # Now compute average dimensions
    class_avg_dimensions = {}
    for class_name, dims_list in class_dimensions.items():
        h_list, w_list, l_list = zip(*dims_list)
        avg_h = sum(h_list) / len(h_list)
        avg_w = sum(w_list) / len(w_list)
        avg_l = sum(l_list) / len(l_list)
        class_avg_dimensions[class_name] = (avg_h, avg_w, avg_l)
    # Output the results
    print(f"Maximum x distance: {max_x} (in file '{max_x_file}')")
    print(f"Maximum y distance: {max_y} (in file '{max_y_file}')")
    print(f"Maximum z distance: {max_z} (in file '{max_z_file}')")
    print("\nNumber of objects per class:")
    for class_name, count in class_counts.items():
        print(f"{class_name}: {count}")
    print("\nAverage dimensions per class (height, width, length):")
    for class_name, dims in class_avg_dimensions.items():
        print(f"{class_name}: Height={dims[0]:.4f}, Width={dims[1]:.4f}, Length={dims[2]:.4f}")


In [4]:
analyze_dataset('/home/javier/datasets/DAIR-V2X/single-infrastructure-side-mmdet/training/label_2')


Maximum x distance: 63.46617976816844 (in file '002583.txt')
Maximum y distance: 41.5854230976786 (in file '001495.txt')
Maximum z distance: 197.84190339956504 (in file '008639.txt')

Number of objects per class:
Trafficcone: 159930
Car: 105090
Pedestrian: 25194
Motorcyclist: 18256
Cyclist: 17244
Barrowlist: 128

Average dimensions per class (height, width, length):
Trafficcone: Height=0.8396, Width=0.3951, Length=0.4154
Car: Height=1.6908, Width=1.9563, Length=4.5915
Pedestrian: Height=1.6469, Width=0.5629, Length=0.5240
Motorcyclist: Height=1.4587, Width=0.7292, Length=1.7943
Cyclist: Height=1.4287, Width=0.6494, Length=1.6856
Barrowlist: Height=1.0043, Width=0.5887, Length=0.8748
