## 在KITTI数据集上评估我的定位方法的效果

KITTI Object 文件目录结构

* object/
    - training/（**NOTE：本程序使用这部分数据**）
        - calib/
            - 00000.txt
            - 00001.txt
            - ...
            - 07480.txt
            
        - image_2/
            - 00000.png
            - 00001.png
            - ...
            - 07480.png
            
        - label_2/
            - 00000.txt
            - 00001.txt
            - ...
            - 07480.txt
            
    - testing/  (**NOTE: 文件夹下没有 label_2 这个目录，需要自己生成标签文件，然后上传到服务器评估**)
        - calib/
            - 00000.txt
            - 00001.txt
            - ...
            - 07510.txt
            
        - image_2/
            - 00000.png
            - 00001.png
            - ...
            - 07510.png

In [61]:
import os
import cv2
import json

### 将定位结果保存成文件

In [63]:
"""
行人3D定位部分，不涉及到 FairMOT 的代码，而是单独写的这部分代码，直接利用KITTI数据集 ——
    行人 bbox、
    行人高度、
    内参矩阵的 fx和fy、
    图片分辨率

我需要清楚的是：一个 calib_xxxxx.txt，对应一张图片 img_xxxxx.png，对应多个行人==多个行人标签
"""


data_dir = '/mnt/sdb/public/data/kitti/object/training'
label_dir = os.path.join(data_dir, 'label_2')
img_dir = os.path.join(data_dir, 'image_2')
calib_dir = os.path.join(data_dir, 'calib')

square_dists = [100, 400, 900, 1600, 2500]
flag2interval = {
        0: 'd00_10',
        1: 'd10_20',
        2: 'd20_30',
        3: 'd30_40',
        4: 'd40_50',
        5: 'd_gt_50'
    }

# count = 0
for label_file in os.listdir(label_dir):
    if label_file.endswith('.txt'):
#         count += 1

        begin, end = 0, label_file.find('.')
        img_name = label_file[begin:end] + '.png'
        
        img = cv2.imread(os.path.join(img_dir, img_name))
        img_height, img_width, _ = img.shape
        
        img_x_center, img_y_center = img_width/2, img_height/2
        

#         print('calib_dir:', calib_dir)
#         print('calib_file:', calib_file)
        with open(os.path.join(label_dir, label_file)) as f_label, open(os.path.join(calib_dir, label_file)) as f_calib:
            fx = None
            fy = None
            lines = f_calib.readlines()
            for line in lines:
                if "P2:" in line:
                    values = line.strip().split(' ')
                    if len(values) > 0:
                        fx = float(values[1])
                        fy = float(values[6])
                    break
            
            
            ltrb = None
            H = None
            xyz = None
            
            """
            results存放计算ground-truth和对应的计算结果，一张图片可能有多个行人，所以要设计成这样
            
            {
                person_id: {'gt': ..., 'computed': ...}  
                ...
                ...
            }
            注：一个文件可能包含多个Pedestrian，这里person_id就是Pedestrian所在文件的行数，从0开始
            """
            results = {}
            
            lines = f_label.readlines()
            for idx, line in enumerate(lines):
                values = line.strip().split(' ')
                if len(values) > 0 and values[0] == 'Pedestrian':
                    results[idx] ={}
                    
                    # bbox of pedestrian in image plane
                    ltrb = values[4:8]
                    bbox_x_center = (float(ltrb[0]) + float(ltrb[2]))/2
                    bbox_y_center = (float(ltrb[1]) + float(ltrb[3]))/2
                    h = float(ltrb[3]) - float(ltrb[1])
                    
                    if h >= 40:
                        mode = 'easy'
                    elif h >= 25 and h < 40:
                        mode = 'moderate'
                    else:
                        mode = 'hard'
                    results[idx]['mode'] = mode
                    
                    """
                    解决单目物体尺度歧义性 --> pedestrian height: 
                    有两种选择，1）是高斯分布的均值1.7m，2）是直接从KITTI文件读取行人高度
                    注：因为我要在KITTI数据集评估，它本身提供了行人身高这个值，所以直接读取这个值进行计算
                        当数据集没有提供行人准确身高这个数值时，使用高斯分布的均值代入，这两种方法我都实验了效果，
                        差距并不大，实验证明使用准确的行人身高，定位误差会更小一些，与实际相符
                    """
#                     option 1:
                    H = float(values[8])
#                     option 2:
#                     H = 1.7

                    # ground-truth pedestrian position in camera coordiante
                    xyz = values[11:14]
                    
                    square_dist = float(xyz[0])**2 + float(xyz[1])**2 + float(xyz[2])**2
                    flag = -1
                    for i,elem in enumerate(square_dists):
                        if square_dist < elem:
                            flag = i
                            break
                    if flag == -1:
                        flag = 5
                    interval = flag2interval[flag]
                    
                    results[idx]['interval'] = interval
                    
                    computed_x = fy/fx * H * (bbox_x_center-img_x_center) / h
                    computed_y = H * (bbox_y_center-img_y_center) / h
                    computed_z = H * fy / h
#                     print(label_file, '> ground-truth:', xyz[0], xyz[1], xyz[2])
#                     print(label_file, '> computed:', '{:.2f}'.format(computed_x), '{:.2f}'.format(computed_y), '{:.2f}'.format(computed_z))
                    
#                     results['gt'] = [float(xyz[0]), float(xyz[1]), float(xyz[2])]
                    results[idx]['gt'] = xyz
                    results[idx]['computed'] = ['{:.2f}'.format(computed_x), '{:.2f}'.format(computed_y), '{:.2f}'.format(computed_z)]
                                        
            if len(results) != 0:
                result_name = label_file[begin:end] + '.json'
                with open(os.path.join(data_dir, 'localization', result_name), 'w') as f_result:
                    json.dump(results, f_result)

### 评估定位结果

In [56]:
loc_dir = os.path.join(data_dir, 'localization')
def analyze_localization():
    with open(os.path.join(data_dir, 'localization', 'error_rate.json'), 'w') as fout:
        data = {}
        
        for f_name in os.listdir(loc_dir):
            if f_name.endswith('.json') and f_name != 'error_rate.json':  # 别忽略第二个条件
                f = os.path.join(loc_dir, f_name)
                with open(f) as fin:
                    results = json.load(fin)
                    data[f_name] = {}

                    for key in results.keys():
                        # convert string to float
                        gt = [ float(elem) for elem in results[key]['gt'] ]
                        computed = [ float(elem) for elem in results[key]['computed'] ]
                        # compute error
                        error = [ g-c for g,c in zip(gt, computed) ]
                        # compute error rate
                        error_rate = []
                        for e,g in zip(error, gt):
                            if g == 0:
                                error_rate.append(abs(e))
                            else:
                                error_rate.append(abs(e/g))
                        data[f_name][key] = error_rate
                        print(f_name, key, '-> error_rate:', error_rate)
                    
        json.dump(data, fout, indent=4)

In [57]:
analyze_localization()

007157.json 1 -> error_rate: [0.7260869565217394, 0.5771812080536912, 0.7272727272727273]
006301.json 7 -> error_rate: [0.023316062176165768, 0.7971014492753623, 0.02710027100271009]
006083.json 0 -> error_rate: [0.11764705882352935, 0.6518518518518519, 0.05283757338551868]
006083.json 2 -> error_rate: [0.274390243902439, 0.9150943396226415, 0.017561983471074374]
004240.json 2 -> error_rate: [0.13440860215053752, 0.7804878048780488, 0.012767066180302291]
003302.json 5 -> error_rate: [0.048739495798319335, 0.7586206896551725, 0.007727975270479108]
001371.json 6 -> error_rate: [0.12033195020746888, 0.9606741573033708, 0.010118897040222579]
003277.json 0 -> error_rate: [0.05897435897435897, 0.6643835616438356, 0.054545454545454536]
007156.json 11 -> error_rate: [0.2598425196850393, 1.2590673575129532, 0.007257291524031236]
007156.json 12 -> error_rate: [0.337313432835821, 1.2857142857142856, 0.005410523468145619]
007301.json 6 -> error_rate: [0.009389671361502356, 0.6029411764705882, 0.01

006685.json 0 -> error_rate: [0.03278688524590167, 0.6233766233766234, 0.09239940387481373]
005482.json 2 -> error_rate: [0.055693069306930715, 0.8392857142857143, 0.031353135313531275]
005482.json 3 -> error_rate: [0.059040590405904106, 0.7857142857142857, 0.029590948651000856]
001161.json 0 -> error_rate: [0.03999999999999994, 0.6688741721854304, 0.04149933065595711]
006997.json 1 -> error_rate: [0.18695652173913052, 0.5695364238410596, 0.19955654101995574]
000015.json 1 -> error_rate: [0.07999999999999997, 0.7218045112781956, 0.06587615283267458]
000015.json 2 -> error_rate: [0.16666666666666674, 0.7446808510638298, 0.021126760563380347]
000015.json 3 -> error_rate: [0.16363636363636366, 0.7714285714285714, 0.021882741535920628]
000015.json 4 -> error_rate: [0.229050279329609, 0.7867647058823529, 0.024463519313304732]
000931.json 1 -> error_rate: [0.0358514724711907, 0.8098591549295775, 0.027613412228796954]
000931.json 2 -> error_rate: [0.043580683156655003, 0.8273381294964028, 0.0

ZeroDivisionError: float division by zero

### 评估检测结果

In [None]:
def analyze_detection():
    # 仅对有行人的图片进行检测
    for f_name in os.listdir(loc_dir):
        if f_name.endswith('.json'):
            begin, end = 0, f_name.find('.')
            img_name = f_name[begin:end]
            os.path.join()

### 用OpenCV读取照片的 宽度、长度

In [4]:
data_dir = '/mnt/sdb/public/data/kitti/object/training'
label_dir = os.path.join(data_dir, 'label_2')
img_dir = os.path.join(data_dir, 'image_2')

img = cv2.imread(os.path.join(img_dir, '000000.png'))
height, width, channels = img.shape

In [38]:
len({})

0

In [39]:
os.path.join(data_dir, 'localization', 'aaa')

'/mnt/sdb/public/data/kitti/object/training/localization/aaa'