In [40]:
import numpy as np
import cv2
import os
import glob
from collections import Counter

training_img_path = 'kitti/training/image_2'  # path to your training images
training_label_path = 'kitti/training/label_2'  # path to your training labels
testing_img_path = 'kitti/testing/image_2'  # path to your testing images

train_image_names = glob.glob(os.path.join(training_img_path, '*.png'))
train_image_names = sorted(train_image_names)
train_images_length = len(train_image_names)

test_image_names = glob.glob(os.path.join(testing_img_path, '*.png'))
test_image_names = sorted(test_image_names)
test_images_length = len(test_image_names)


def get_image_size(image_names, kind=''):  # kind should be in [train, test]
    if kind == 'train':
        length = train_images_length
    elif kind == 'test':
        length = test_images_length
    else:
        print('kind must be in [train, test]')
        return 
    image_heights = []
    image_widths = []
    

    heights = []
    widths = []
    width_height_ratios = [] 
    nums_percentages = []
    print('This will take a lot of time, please wait until done!')
    for image in image_names:
        # if int(os.path.split(image)[1].split('.')[0])%500 == 0:
        #     print(int(os.path.split(image)[1].split('.')[0]))
        im = cv2.imread(image)
        height = im.shape[0]
        width = im.shape[1]
        heights.append(height)
        widths.append(width)
        if height not in image_heights or width not in image_widths:
            print('{} image size: {}*{}, width height ratio:{:.3f}'.format(kind, width, height, width*1.0/height))
            image_heights.append(height)
            image_widths.append(width)
            width_height_ratios.append(format(width*1.0/height, '.3f'))
    print("Done!")
        
    width_nums_dict = dict(Counter(widths).items())
    nums = [width_nums_dict[image_width] for image_width in image_widths]
    percentage = [format(width_nums_dict[image_width]*1.0/length, '.4f') for image_width in image_widths]
        
        
    return image_widths, image_heights, nums, width_height_ratios, percentage

train_image_widths, train_image_heights, train_nums, train_w_h_ratios, train_per = get_image_size(train_image_names, kind='train')
test_image_widths, test_image_heights, test_nums, test_w_h_ratios, test_per = get_image_size(test_image_names, kind='test')

train_results = ['|{:^4}*{:^3}|{:^6}|{:^7}|{:^16.2%}|'.format(width, height, num, wph, float(numper)) 
 for width, height, num, wph, numper in zip(train_image_widths, train_image_heights, train_nums, train_w_h_ratios, train_per)]
test_results = ['|{:^4}*{:^3}|{:^6}|{:^7}|{:^16.2%}|'.format(width, height, num, wph, float(numper))
 for width, height, num, wph, numper in zip(test_image_widths, test_image_heights, test_nums, test_w_h_ratios, test_per)]

print('training')
print('|  size  | nums |  w/h  | mums percentage|')
for result in train_results:
    print(result)
print('-'*40)
print('testing')
print('|  size  | nums |  w/h  | mums percentage|')
for result in test_results:
    print(result)


This will take a lot of time, please wait until done!
train image size: 1224*370, width height ratio:3.308
train image size: 1242*375, width height ratio:3.312
train image size: 1238*374, width height ratio:3.310
train image size: 1241*376, width height ratio:3.301
Done!
This will take a lot of time, please wait until done!
test image size: 1242*375, width height ratio:3.312
test image size: 1224*370, width height ratio:3.308
test image size: 1226*370, width height ratio:3.314
Done!
training
|  size  | nums |  w/h  | mums percentage|
|1224*370|0.1029 | 3.308 |     10.29%     |
|1242*375|0.8097 | 3.312 |     80.97%     |
|1238*374|0.0479 | 3.310 |     4.79%      |
|1241*376|0.0396 | 3.301 |     3.96%      |
----------------------------------------
testing
|  size  | nums |  w/h  | mums percentage|
|1242*375|0.8751| 3.312 |     87.51%     |
|1224*370|0.1155| 3.308 |     11.55%     |
|1226*370|0.0094| 3.314 |     0.94%      |


In [42]:
train_results = ['|{:^4}*{:^3}|{:^6}|{:^7}|{:^16.2%}|'.format(width, height, num, wph, float(numper)) 
 for width, height, num, wph, numper in zip(train_image_widths, train_image_heights, train_nums, train_w_h_ratios, train_per)]
test_results = ['|{:^4}*{:^3}|{:^6}|{:^7}|{:^16.2%}|'.format(width, height, num, wph, float(numper))
 for width, height, num, wph, numper in zip(test_image_widths, test_image_heights, test_nums, test_w_h_ratios, test_per)]

print('training')
print('|  size  | nums |  w/h  | mums percentage|')
for result in train_results:
    print(result)
print('-'*40)
print('testing')
print('|  size  | nums |  w/h  | mums percentage|')
for result in test_results:
    print(result)

training
|  size  | nums |  w/h  | mums percentage|
|1224*370| 770  | 3.308 |     10.29%     |
|1242*375| 6057 | 3.312 |     80.97%     |
|1238*374| 358  | 3.310 |     4.79%      |
|1241*376| 296  | 3.301 |     3.96%      |
----------------------------------------
testing
|  size  | nums |  w/h  | mums percentage|
|1242*375| 6579 | 3.312 |     87.51%     |
|1224*370| 868  | 3.308 |     11.55%     |
|1226*370|  71  | 3.314 |     0.94%      |


In [46]:
class_sets = ('pedestrian', 'cyclist', 'car', 'dontcare')
class_sets_dict = dict((k, i) for i, k in enumerate(class_sets))
print(class_sets_dict)

{'cyclist': 1, 'pedestrian': 0, 'dontcare': 3, 'car': 2}
