# Evaluation

This notebook uses a model trained on KITTI to evaluate uncertainty

In [None]:
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

import numpy as np
import tensorflow as tf
from PIL import Image
import cv2
from matplotlib import pyplot as plt
import time
import pickle
from tqdm import tqdm_notebook as tqdm

from utils.evaluation_utils import load_gt_disp_kitti,convert_disps_to_depths_kitti,compute_errors,pred_depth_derivative
from uncertainty import monodepth_uncertainty

## Preparation

Load the images test set

In [None]:
monodepth_root = ''
kitti_root = '/your/kitti_raw/path/'

files = monodepth_root + 'utils/filenames/kitti_stereo_2015_test_files_png.txt'
files_list = []
with open(files, 'r') as f:
    for l in f.readlines():
        files_list.append(l.split(' '))

In [None]:
# parameters
class params_cl():
    
    mc_samples = 100
    batch_size = 1
    do_stereo = False
    mode = 'test'
    use_deconv = False
    encoder = 'vgg'
    use_dropout = True
    disp_gradient_loss_weight = 0.1
    lr_loss_weight = 1.0
    width = 512
    height = 256
    drop_rate = 0.5
    
    # trained model
    checkpoint_path = monodepth_root + 'runs/monodepth_dropout_onedrop15520745/monodepth_dropout_onedrop/model-181250'
    
params = params_cl()

## Evaluate Model

### Run for sample image

In [None]:
# image idx
idx = 3

# get graph
uncertainty_graph = monodepth_uncertainty(params)

### Forward Prop MC
print('Prop MC forward...')
res_mean_mc, res_var_mc, rt_mc = uncertainty_graph.forward_mc(kitti_root + files_list[idx][0])

### Forward Prop OUR APPROXIMATION
print('Prop OUR forward...')
res_mean_our, res_var_our, rt_our = uncertainty_graph.forward_our(kitti_root + files_list[idx][0])

In [None]:
# runtime comparison
rt_mc, rt_our

In [None]:
# visualize qualitative result
fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(15,10))

# mc results
fig.colorbar(ax[0,0].imshow(res_mean_mc, cmap='hot', interpolation='nearest'), ax=ax[0,0])
fig.colorbar(ax[0,1].imshow(np.log(res_var_mc), cmap='binary', interpolation='nearest'), ax=ax[0,1])

# our results
fig.colorbar(ax[1,0].imshow(res_mean_our, cmap='hot', interpolation='nearest'), ax=ax[1,0])
fig.colorbar(ax[1,1].imshow(np.log(res_var_our), cmap='binary', interpolation='nearest'), ax=ax[1,1])

plt.show()

### rmse correlation with uncertainty

Get predicted disparities and variances

In [None]:
# test predictions already exist, load them
if os.path.isfile('test_preds.p'):
    pred_disps, vars_mc, vars_our, rts_mc, rts_our = pickle.load(open('test_preds.p', 'rb'))
else:
    # get graph
    uncertainty_graph = monodepth_uncertainty(params)

    # gt_depths, pred_depths, pred_disparities_resized = convert_disps_to_depths_kitti(gt_disparities, pred_disparities)

    rts_mc, rts_our = [], []
    pred_disps = []
    vars_mc, vars_our = [], []
    for i in tqdm(range(len(files_list))):
        ##########################
        ### Forward Prop MC
        mean_mc, var_mc, rt_mc = uncertainty_graph.forward_mc(kitti_root + files_list[i][0], pp=False)

        rts_mc.append(rt_mc)
        pred_disps.append(mean_mc)
        vars_mc.append(var_mc)
        ##########################

        ##########################
        ### Forward Prop OUR APPROXIMATION
        mean_our, var_our, rt_our = uncertainty_graph.forward_our(kitti_root + files_list[i][0], pp=False)

        rts_our.append(rt_our)
        vars_our.append(var_our)
        ##########################

    pred_disps = np.array(pred_disps)  
    
    pickle.dump([pred_disps, vars_mc, vars_our, rts_mc, rts_our], open('test_preds.p', 'wb'))

Compute error-variance pairs

In [None]:
# as in original work
min_depth = 1e-3
max_depth = 80

# load gt and convert disps to depths
gt_disps = load_gt_disp_kitti('/media/sdb/kitti_raw')
gt_depths, pred_depths, pred_disparities_resized = convert_disps_to_depths_kitti(gt_disps, pred_disps)

abs_diff_var = []
for i in tqdm(range(len(gt_depths))):
    
    # get depths & vars
    gt_depth = gt_depths[i]
    pred_depth = pred_depths[i]
    v_our = vars_our[i]
    v_mc = vars_mc[i]
    
    # get pred disps and derivatives of depth wrt disps
    pred_disp = pred_disparities_resized[i]
    dDepth = pred_depth_derivative(pred_disp)
    
    h, w = gt_depth.shape
    v_our = cv2.resize(v_our[0], (w, h), interpolation=cv2.INTER_LINEAR)
    v_mc = cv2.resize(v_mc[0], (w, h), interpolation=cv2.INTER_LINEAR)
    
    # enforce thresholds
    pred_depth[pred_depth < min_depth] = min_depth
    pred_depth[pred_depth > max_depth] = max_depth

    # get gt disps and mask legit disps
    gt_disp = gt_disps[i]
    mask = gt_disp > 0
    
    # propagate variance through depth computation
    v_our = v_our[mask] * dDepth[mask]**2
    v_mc = v_mc[mask] * dDepth[mask]**2

    # compute absolute disparity difference
    disp_diff = np.abs(gt_disp[mask] - pred_disp[mask])
#     bad_pixels = np.logical_and(disp_diff >= 3, (disp_diff / gt_disp[mask]) >= 0.05)

    # compute absule depth error
    abs_diff = np.abs(gt_depth[mask] - pred_depth[mask])

    # store all absolute error-variances-pairs
    for i in range(len(abs_diff)):
        abs_diff_var.append([abs_diff[i], v_mc[i], v_our[i]])

In [None]:
# error values
errs = np.array([d[0] for d in abs_diff_var])
# convert to np arrays
abs_diff_var = np.array([np.array(d) for d in abs_diff_var])
# percentiles
intervals = np.percentile(errs, np.arange(1, 101, 1))

Sum variances in error intervals

In [None]:
counts = np.zeros(len(intervals))
sums_our = np.zeros(len(intervals))
sums_mc = np.zeros(len(intervals))
for i in tqdm(range(len(intervals))):
    if i == 0:
        mask = np.logical_and(abs_diff_var[:, 0] > 0, abs_diff_var[:, 0] <= intervals[i])
    else:
        mask = np.logical_and(abs_diff_var[:, 0] > intervals[i-1], abs_diff_var[:, 0] <= intervals[i])
    counts[i] = mask.sum()
    sums_our[i] = np.sqrt(abs_diff_var[mask, 2]).sum()
    sums_mc[i] = np.sqrt(abs_diff_var[mask, 1]).sum()

Dump results

In [None]:
pickle.dump({
    'intervals': intervals[:-1], 
    'mean_unc_mc': (sums_mc/counts)[:-1],
    'mean_unc_our': (sums_our/counts)[:-1],
}, open('results/quantitative/unc_error_corr.p', 'wb'))

### variance differences and runtime comparison

In [None]:
gt_disps = load_gt_disp_kitti('/media/sdb/kitti_raw')

# iterate over different number of mc samples
mean_mc_rts, mean_our_rts = [], []
var_diffs_all = []
for i in np.arange(2, 102, 10):

    print(str(i) + ' samples...')
    params.mc_samples = i
    uncertainty_graph = monodepth_uncertainty(params)

    rts_mc, rts_our = [], []
    var_diffs = []
    for i in tqdm(range(len(files_list))):
        ##########################
        ### Forward Prop MC
        mean_mc, var_mc, rt_mc = uncertainty_graph.forward_mc(kitti_root + files_list[i][0], pp=False)
        rts_mc.append(rt_mc)
        ##########################


        ##########################
        ### Forward Prop OUR APPROXIMATION
        mean_our, var_our, rt_our = uncertainty_graph.forward_our(kitti_root + files_list[i][0], pp=False)

        rts_our.append(rt_our)
        ##########################
        
        var_diffs.append(np.abs(var_our - var_mc).mean())
        
    mean_mc_rts.append(np.mean(rts_mc))
    mean_our_rts.append(np.mean(rts_our))
    var_diffs_all.append(np.mean(var_diffs))

In [None]:
if not os.path.isfile('results/quantitative/rts&abs_diffs.p'):
    pickle.dump({
        'samples': np.arange(2, 102, 10),
        'mean_rt_mc': mean_mc_rts,
        'mean_rt_our': mean_our_rts,
        'mean_abs_diff_vars': var_diffs_all
    }, open('results/quantitative/rts&abs_diffs.p', 'wb'))