In [None]:
import skimage.transform

In [None]:
import os
import torch
import numpy as np
import matplotlib.pyplot as plt

In [None]:
from easydict import EasyDict as edict

from main_monodepth_pytorch import Model
%reload_ext autoreload
%autoreload 2

## Train

Check if CUDA is available

In [None]:
torch.cuda.is_available()

In [None]:
torch.cuda.device_count()

In [None]:
torch.cuda.empty_cache()

In [None]:
dict_parameters = edict({'data_dir':'Z:/DL_Project/data',
                         'val_data_dir':'Z:/DL_Project/data',
                         'model_path':'Z:/DL_Project/monodepth_resnet18_002_cpt.pth', 
                         'output_directory':'Z:/DL_Project/output_18_002',
                         'input_height':256,
                         'input_width':512,
                         'model':'resnet18_md',
                         'pretrained':True,
                         'mode':'train',
                         'epochs':50,
                         'learning_rate':1e-4,
                         'batch_size': 8,
                         'adjust_lr':True,
                         'device':'cuda:0',
                         'do_augmentation':True,
                         'augment_parameters':[0.8, 1.2, 0.5, 2.0, 0.8, 1.2],
                         'print_images':False,
                         'print_weights':False,
                         'input_channels': 3,
                         'num_workers': 8,
                         'use_multiple_gpu': False})

In [None]:
model = Model(dict_parameters)
model.load('Z:/DL_Project/monodepth_resnet50_002_last.pth')

In [None]:
model.train()

## Test the model

In [None]:
dict_parameters_test = edict({'data_dir':'Z:/DL_Project/xxx',
                              'model_path':'Z:/DL_Project/monodepth_resnet18_001_cpt.pth',
                              'output_directory':'Z:/DL_Project/xxx/',
                              'input_height':256,
                              'input_width':512,
                              'model':'resnet50_md',
                              'pretrained':False,
                              'mode':'test',
                              'device':'cuda:0',
                              'input_channels':3,
                              'num_workers':8,
                              'use_multiple_gpu':False})
model_test = Model(dict_parameters_test)

In [None]:
model_test.test()

In [None]:
disp = np.load('Z:/DL_Project/xxx/disparities_pp.npy')  # Or disparities.npy for output without post-processing
disp.shape

In [None]:
disp_to_img = skimage.transform.resize(disp[0].squeeze(), [375, 1242], mode='constant')
plt.imshow(disp_to_img, cmap='plasma')

Save a color image

In [None]:
plt.imsave(os.path.join(dict_parameters_test.output_directory,
                        dict_parameters_test.model_path.split('/')[-1][:-4]+'_test_output.png'), disp_to_img, cmap='plasma')

Save all test images

In [None]:
for i in range(disp.shape[0]):
    disp_to_img = skimage.transform.resize(disp[i].squeeze(), [375, 1242], mode='constant')
    plt.imsave(os.path.join(dict_parameters_test.output_directory,
               'pred_'+str(i)+'.png'), disp_to_img, cmap='plasma')

Save a grayscale image

In [None]:
plt.imsave(os.path.join(dict_parameters_test.output_directory,
                        dict_parameters_test.model_path.split('/')[-1][:-4]+'_gray.png'), disp_to_img, cmap='gray')

## Evaluation

In [None]:
import cv2

In [None]:
width_to_focal = dict()
width_to_focal[1242] = 721.5377
width_to_focal[1241] = 718.856
width_to_focal[1224] = 707.0493
width_to_focal[1238] = 718.3351
def load_gt_disp_kitti(path):
    gt_disparities = []
    for i in range(97):
        disp = cv2.imread(path + str(i+5).zfill(10) + ".png", -1)
        print(disp)
        disp = disp.astype(np.float32) / 256
        gt_disparities.append(disp)
    return gt_disparities

def compute_errors(gt, pred):
    thresh = np.maximum((gt / pred), (pred / gt))
    a1 = (thresh < 1.25   ).mean()
    a2 = (thresh < 1.25 ** 2).mean()
    a3 = (thresh < 1.25 ** 3).mean()

    rmse = (gt - pred) ** 2
    rmse = np.sqrt(rmse.mean())

    rmse_log = (np.log(gt) - np.log(pred)) ** 2
    rmse_log = np.sqrt(rmse_log.mean())

    abs_rel = np.mean(np.abs(gt - pred) / gt)

    sq_rel = np.mean(((gt - pred)**2) / gt)

    return abs_rel, sq_rel, rmse, rmse_log, a1, a2, a3
def convert_disps_to_depths_kitti(gt_disparities, pred_disparities):
    gt_depths = []
    pred_depths = []
    pred_disparities_resized = []
    
    for i in range(len(gt_disparities)):
        gt_disp = gt_disparities[i]
        height, width = gt_disp.shape

        pred_disp = pred_disparities[i]
        pred_disp = width * cv2.resize(pred_disp, (width, height), interpolation=cv2.INTER_LINEAR)

        pred_disparities_resized.append(pred_disp) 

        mask = gt_disp > 0

        gt_depth = width_to_focal[width] * 0.54 / (gt_disp + (1.0 - mask))
        pred_depth = width_to_focal[width] * 0.54 / pred_disp

        gt_depths.append(gt_depth)
        pred_depths.append(pred_depth)
    return gt_depths, pred_depths, pred_disparities_resized

In [None]:
pred_disparities = np.load("Z:/DL_Project/xxx/disparities_pp.npy")
gt_disparities = load_gt_disp_kitti("Z:/DL_Project/test/2011_09_26_drive_0001_sync/proj_depth/groundtruth/image_02/")
gt_depths, pred_depths, pred_disparities_resized = convert_disps_to_depths_kitti(gt_disparities, pred_disparities)

In [None]:
num_samples=97
rms     = np.zeros(num_samples, np.float32)
log_rms = np.zeros(num_samples, np.float32)
abs_rel = np.zeros(num_samples, np.float32)
sq_rel  = np.zeros(num_samples, np.float32)
d1_all  = np.zeros(num_samples, np.float32)
a1      = np.zeros(num_samples, np.float32)
a2      = np.zeros(num_samples, np.float32)
a3      = np.zeros(num_samples, np.float32)
for i in range(num_samples):
        
    gt_depth = gt_depths[i]
    pred_depth = pred_depths[i]
    min_depth=1e-3
    max_depth=80
    pred_depth[pred_depth < min_depth] = min_depth
    pred_depth[pred_depth > max_depth] = max_depth
        
    gt_disp = gt_disparities[i]
    
    mask = gt_disp > 0
    pred_disp = pred_disparities_resized[i]

    disp_diff = np.abs(gt_disp[mask] - pred_disp[mask])
    bad_pixels = np.logical_and(disp_diff >= 3, (disp_diff / gt_disp[mask]) >= 0.05)
    d1_all[i] = 100.0 * bad_pixels.sum() / mask.sum()

    abs_rel[i], sq_rel[i], rms[i], log_rms[i], a1[i], a2[i], a3[i] = compute_errors(gt_depth[mask], pred_depth[mask])

In [None]:
print("{:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}".format('abs_rel', 'sq_rel', 'rms', 'log_rms', 'd1_all', 'a1', 'a2', 'a3'))
print("{:10.4f}, {:10.4f}, {:10.3f}, {:10.3f}, {:10.3f}, {:10.3f}, {:10.3f}, {:10.3f}".format(abs_rel.mean(), sq_rel.mean(), rms.mean(), log_rms.mean(), d1_all.mean(), a1.mean(), a2.mean(), a3.mean()))