In [1]:
import numpy as np
import cv2
import argparse
import torch
import os 
from PIL import Image

In [36]:
path = '../../data/kitti/data_scene_flow/training/disp_noc_0'
directory = os.listdir(path)

disparities = np.zeros((155, 375, 1242), dtype=np.float32)

for i,file in enumerate(directory):
    img = Image.open(path+ '/' + file)
    img = np.array(img) 
    if (img.shape[0] == 375 and img.shape[1] == 1242):
        disparities[i] = img
        
np.save('../../data/kitti/disparities.npy', disparities)

In [13]:
def compute_errors(gt, pred):
    """ Evaluates the predicted depth data on ground truth

    Args:
        gt: numpy array (2D) of the ground depth map
        pred: numpy array (2D) of the predicted depth

    Returns:
        -abs_rel
        -sq_rel
        -rmse
        -rmse_log
        -a1
        -a2x
        -a3
    """


    thresh = np.maximum((gt / pred), (pred / gt))
    a1 = (thresh < 1.25   ).mean()
    a2 = (thresh < 1.25 ** 2).mean()
    a3 = (thresh < 1.25 ** 3).mean()

    rmse = (gt - pred) ** 2
    rmse = np.sqrt(rmse.mean())

    rmse_log = (np.log(gt) - np.log(pred)) ** 2
    rmse_log = np.sqrt(rmse_log.mean())

    abs_rel = np.mean(np.abs(gt - pred) / gt)

    sq_rel = np.mean(((gt - pred)**2) / gt)

    return abs_rel, sq_rel, rmse, rmse_log, a1, a2, a3

In [80]:
width_to_focal = dict()
width_to_focal[1242] = 721.5377
width_to_focal[1241] = 718.856
width_to_focal[1224] = 707.0493
width_to_focal[1238] = 718.3351
        
def load_gt_disp_kitti(path):
    gt_disparities = []
    for i in range(155):
        disp = cv2.imread(path + "/training/disp_noc_0/" + str(i).zfill(6) + "_10.png", -1)
        disp = disp.astype(np.float32) / 256
        gt_disparities.append(disp)
    return gt_disparities

def convert_disps_to_depths_kitti(gt_disparities, pred_disparities):
    gt_depths = []
    pred_depths = []
    pred_disparities_resized = []
    
    for i in range(len(gt_disparities)):
        gt_disp = gt_disparities[i]
        height, width = gt_disp.shape

        pred_disp = pred_disparities[i]
        pred_disp = width * cv2.resize(pred_disp, (width, height), interpolation=cv2.INTER_LINEAR)

        pred_disparities_resized.append(pred_disp) 

        mask = gt_disp > 0

        gt_depth = width_to_focal[width] * 0.54 / (gt_disp + (1.0 - mask))
        pred_depth = width_to_focal[width] * 0.54 / pred_disp

        gt_depths.append(gt_depth)
        pred_depths.append(pred_depth)
        
    return gt_depths, pred_depths, pred_disparities_resized

In [81]:
def evaluate_kitti_gt(predicted_disp_path,gt_path,min_depth,max_depth):
    """ Evaluates the predicted depth data for the KITI dataset on the 200 ground truth files 

    Args:
        gt: numpy array (2D) of the ground depth map
        pred: numpy array (2D) of the predicted depth

    Returns:
        -abs_rel
        -sq_rel
        -rmse
        -rmse_log
        -a1
        -a2
        -a3
    """
    
    pred_disparities = load_gt_disp_kitti(gt_path) #np.load(predicted_disp_path)
    
    num_samples = 155
    gt_disparities = load_gt_disp_kitti(gt_path)
    gt_depths, pred_depths, pred_disparities_resized = convert_disps_to_depths_kitti(gt_disparities, pred_disparities)
    
    rms     = np.zeros(num_samples, np.float32)
    log_rms = np.zeros(num_samples, np.float32)
    abs_rel = np.zeros(num_samples, np.float32)
    sq_rel  = np.zeros(num_samples, np.float32)
    d1_all  = np.zeros(num_samples, np.float32)
    a1      = np.zeros(num_samples, np.float32)
    a2      = np.zeros(num_samples, np.float32)
    a3      = np.zeros(num_samples, np.float32)
    
    for i in range(num_samples):
        gt_depth = gt_depths[i]
        pred_depth = pred_depths[i]
        
        pred_depth[pred_depth < min_depth] = min_depth
        pred_depth[pred_depth > max_depth] = max_depth
        
        gt_disp = gt_disparities[i]
        mask = gt_disp > 0
        pred_disp = pred_disparities_resized[i]
        disp_diff = np.abs(gt_disp[mask] - pred_disp[mask])
        bad_pixels = np.logical_and(disp_diff >= 3, (disp_diff / gt_disp[mask]) >= 0.05)
        d1_all[i] = 100.0 * bad_pixels.sum() / mask.sum()
        
        abs_rel[i], sq_rel[i], rms[i], log_rms[i], a1[i], a2[i], a3[i] = compute_errors(gt_depth[mask], pred_depth[mask])

    return abs_rel, sq_rel, rms, log_rms, a1, a2, a3    

In [84]:
evaluate_kitti_gt('../../data/kitti/disparities.npy','../../data/kitti/data_scene_flow/',0,np.max(load_gt_disp_kitti('../../data/kitti/data_scene_flow/')))



(array([  3.49629303e-08,   3.47661100e-08,   3.50645450e-08,
          3.53705225e-08,   3.49424951e-08,   3.53320928e-08,
          3.49319045e-08,   3.52863410e-08,   3.45829747e-08,
          3.47114870e-08,   3.49832341e-08,   3.46009017e-08,
          3.46640370e-08,   3.53743275e-08,   3.52079290e-08,
          3.49085134e-08,   3.50679912e-08,   3.46720022e-08,
          3.48112188e-08,   3.56069521e-08,   3.47025555e-08,
          3.54130378e-08,   3.53091565e-08,   3.48693625e-08,
          3.50472114e-08,   3.51218041e-08,   3.51794789e-08,
          3.52817793e-08,   3.51796068e-08,   3.49628166e-08,
          3.53735636e-08,   3.46625235e-08,   3.52245948e-08,
          3.54254830e-08,   3.55022785e-08,   3.49482541e-08,
          3.51589442e-08,   3.49082399e-08,   3.50539189e-08,
          3.53978606e-08,   3.56456553e-08,   3.53089646e-08,
          3.53748035e-08,   3.49837812e-08,   3.48623530e-08,
          3.47115900e-08,   3.46525759e-08,   3.52739207e-08,
        

In [69]:
gt = load_gt_disp_kitti('../../data/kitti/data_scene_flow/')
np.min(gt)

0.0

In [23]:
#from eval.eval_utils import compute_errors
import numpy as np
import cv2

class EvaluateKittiGT():
    """
    Class that evaluates the KITTI data set on the 200 ground truth image
    """

    def __init__(self,predicted_disp_path,gt_path,min_depth=0,max_depth=80):
        """

        Args:
            predicted_disp_path: path where the predicted disparities are stored after training
            gt_path: path of the ground truth
            min_depth: minimum depth used in predicted disparity map
            max_depth: maximim depth used in predicted disparity map
        """
        super(EvaluateKittiGT, self).__init__()

        self.width_to_focal = dict()
        self.width_to_focal[1242] = 721.5377
        self.width_to_focal[1241] = 718.856
        self.width_to_focal[1224] = 707.0493
        self.width_to_focal[1238] = 718.3351

        self.predicted_disp_path = predicted_disp_path
        self.gt_path = gt_path
        self.min_depth = min_depth
        self.max_depth = max_depth


    def evaluate(self):
        """
        Evaluates the predicted depth data for the KITI dataset on the 200 ground truth files

        Args:
            gt: numpy array (2D) of the ground depth map
            pred: numpy array (2D) of the predicted depth

        Returns:
            -abs_rel
            -sq_rel
            -rmse
            -rmse_log
            -a1
            -a2
            -a3
        """

        pred_disparities = np.load(self.predicted_disp_path)

        num_samples = 155
        gt_disparities = self.__load_gt_disp_kitti(self.gt_path)
        gt_depths, pred_depths, pred_disparities_resized = self.__convert_disps_to_depths_kitti(gt_disparities, pred_disparities)

        rms     = np.zeros(num_samples, np.float32)
        log_rms = np.zeros(num_samples, np.float32)
        abs_rel = np.zeros(num_samples, np.float32)
        sq_rel  = np.zeros(num_samples, np.float32)
        d1_all  = np.zeros(num_samples, np.float32)
        a1      = np.zeros(num_samples, np.float32)
        a2      = np.zeros(num_samples, np.float32)
        a3      = np.zeros(num_samples, np.float32)

        for i in range(num_samples):
            gt_depth = gt_depths[i]
            pred_depth = pred_depths[i]

            pred_depth[pred_depth < self.min_depth] = self.min_depth
            pred_depth[pred_depth > self.max_depth] = self.max_depth

            gt_disp = gt_disparities[i]
            mask = gt_disp > 0
            pred_disp = pred_disparities_resized[i]
            disp_diff = np.abs(gt_disp[mask] - pred_disp[mask])
            bad_pixels = np.logical_and(disp_diff >= 3, (disp_diff / gt_disp[mask]) >= 0.05)
            d1_all[i] = 100.0 * bad_pixels.sum() / mask.sum()

            abs_rel[i], sq_rel[i], rms[i], log_rms[i], a1[i], a2[i], a3[i] = compute_errors(gt_depth[mask], pred_depth[mask])

        return abs_rel, sq_rel, rms, log_rms, a1, a2, a3

    def __load_gt_disp_kitti(self, path):
        """
        Loads in the ground truth files from the KITTI Stereo Dataset

        Args:
         -path (string): path to the the training files

        Returns:
         -gt_disparities (list): list of ground truth disparities
        """
        gt_disparities = []
        for i in range(155):
            disp = cv2.imread(path + "/training/disp_noc_0/" + str(i).zfill(6) + "_10.png", -1)
            disp = disp.astype(np.float32) / 256
            gt_disparities.append(disp)
        return gt_disparities

    def __convert_disps_to_depths_kitti(self, gt_disparities, pred_disparities):
        """
        Converts the ground truth disparities from the KITTI Stereo dataset and the predictions to depth values

        Args:
         -gt_disparities (list): ground truth disparities
         -pred_disparities (list): predicted disparities

        Returns:
         -gt_depths (list): list of ground truth depths
         -pred_depths (list): list of predicted depths
         -pred_depths_resized (list): list of predicted depths, resized to ground truth disparity size

        """
        gt_depths = []
        pred_depths = []
        pred_disparities_resized = []

        for i in range(len(gt_disparities)):
            gt_disp = gt_disparities[i]
            height, width = gt_disp.shape

            pred_disp = pred_disparities[i]
            pred_disp = width * cv2.resize(pred_disp, (width, height), interpolation=cv2.INTER_LINEAR)

            pred_disparities_resized.append(pred_disp)

            mask = gt_disp > 0

            gt_depth = self.width_to_focal[width] * 0.54 / (gt_disp + (1.0 - mask))
            pred_depth = self.width_to_focal[width] * 0.54 / pred_disp

            gt_depths.append(gt_depth)
            pred_depths.append(pred_depth)

        return gt_depths, pred_depths, pred_disparities_resized

In [24]:
kitti_gt = EvaluateKittiGT(predicted_disp_path='../../data/kitti/disparities.npy', gt_path='../../data/kitti/data_scene_flow/', min_depth=0, max_depth=80)
kitti_gt.evaluate()



(array([ 0.99999684,  0.99999684,  0.99999684,  0.99999684,  0.99999684,
         0.99999684,  0.99999684,  0.99999684,  0.99999684,  0.99999684,
         0.99999684,  0.99999684,  0.99999684,  0.99999684,  0.99999684,
         0.99999684,  0.99999684,  0.99999684,  0.99999684,  0.99999684,
         0.99999684,  0.99999684,  0.99999684,  0.99999684,  0.99999684,
         0.99999684,  0.99999684,  0.99999684,  0.99999684,  0.99999684,
         0.99999684,  0.99999684,  0.99999684,  0.99999684,  0.99999684,
         0.99999684,  0.99999684,  0.99999684,  0.99999684,  0.99999684,
         0.99999684,  0.99999684,  0.99999684,  0.99999684,  0.99999684,
         0.99999684,  0.99999684,  0.99999684,  0.99999684,  0.99999684,
         0.99999684,  0.99999684,  0.99999684,  0.99999684,  0.99999684,
         0.99999684,  0.99999684,  0.99999684,  0.99999684,  0.99999684,
         0.99999684,  0.99999684,  0.99999684,  0.99999684,  0.99999684,
         0.99999684,  0.99999684,  0.99999684,  0.9

In [26]:
EvaluateKittiGT(predicted_disp_path='../../data/kitti/disparities.npy', gt_path='../../data/kitti/data_scene_flow/', min_depth=0, max_depth=80).evaluate()



(array([ 0.99999684,  0.99999684,  0.99999684,  0.99999684,  0.99999684,
         0.99999684,  0.99999684,  0.99999684,  0.99999684,  0.99999684,
         0.99999684,  0.99999684,  0.99999684,  0.99999684,  0.99999684,
         0.99999684,  0.99999684,  0.99999684,  0.99999684,  0.99999684,
         0.99999684,  0.99999684,  0.99999684,  0.99999684,  0.99999684,
         0.99999684,  0.99999684,  0.99999684,  0.99999684,  0.99999684,
         0.99999684,  0.99999684,  0.99999684,  0.99999684,  0.99999684,
         0.99999684,  0.99999684,  0.99999684,  0.99999684,  0.99999684,
         0.99999684,  0.99999684,  0.99999684,  0.99999684,  0.99999684,
         0.99999684,  0.99999684,  0.99999684,  0.99999684,  0.99999684,
         0.99999684,  0.99999684,  0.99999684,  0.99999684,  0.99999684,
         0.99999684,  0.99999684,  0.99999684,  0.99999684,  0.99999684,
         0.99999684,  0.99999684,  0.99999684,  0.99999684,  0.99999684,
         0.99999684,  0.99999684,  0.99999684,  0.9