In [1]:
from __future__ import absolute_import, division, print_function

import os
import cv2
import numpy as np

import torch
from torch.utils.data import DataLoader

from layers import disp_to_depth
from utils import readlines
import datasets
import networks
import time
from thop import profile, clever_format

In [2]:
def calc_param(net):
    net_params = filter(lambda p: p.requires_grad, net.parameters())
    weight_count = 0
    for param in net_params:
        weight_count += np.prod(param.size())
    return weight_count

def compute_errors(gt, pred):
    """Computation of error metrics between predicted and ground truth depths
    """
    thresh = np.maximum((gt / pred), (pred / gt))
    a1 = (thresh < 1.25     ).mean()
    a2 = (thresh < 1.25 ** 2).mean()
    a3 = (thresh < 1.25 ** 3).mean()

    rmse = (gt - pred) ** 2
    rmse = np.sqrt(rmse.mean())

    rmse_log = (np.log(gt) - np.log(pred)) ** 2
    rmse_log = np.sqrt(rmse_log.mean())

    abs_rel = np.mean(np.abs(gt - pred) / gt)

    sq_rel = np.mean(((gt - pred) ** 2) / gt)

    return abs_rel, sq_rel, rmse, rmse_log, a1, a2, a3


def batch_post_process_disparity(l_disp, r_disp):
    """Apply the disparity post-processing method as introduced in Monodepthv1
    """
    _, h, w = l_disp.shape
    m_disp = 0.5 * (l_disp + r_disp)
    l, _ = np.meshgrid(np.linspace(0, 1, w), np.linspace(0, 1, h))
    l_mask = (1.0 - np.clip(20 * (l - 0.05), 0, 1))[None, ...]
    r_mask = l_mask[:, :, ::-1]
    return r_mask * l_disp + l_mask * r_disp + (1.0 - l_mask - r_mask) * m_disp

In [3]:
cv2.setNumThreads(0)  # This speeds up evaluation 5x on our unix systems (OpenCV 3.3.1)

splits_dir = os.path.join(os.path.expanduser("~"), "depth", "monodepth2", "splits")
data_path = "/work/garin0115/datasets/kitti_data/"                                
# Models which were trained with stereo supervision were trained with a nominal
# baseline of 0.1 units. The KITTI rig has a baseline of 54cm. Therefore,
# to convert our stereo predictions to real-world scale we multiply our depths by 5.4.
STEREO_SCALE_FACTOR = 5.4
post_process = False
ext_disp_to_eval = None
eval_split = "eigen"
eval_stereo = False
MIN_DEPTH = 1e-3
MAX_DEPTH = 80
disable_median_scaling = False
pred_depth_scale_factor = 1

In [4]:
enc_dict = {"resnet18":networks.ResnetEncoder(18, False)}
# dec_dict = {    "resnet18":networks.DepthDecoder(enc_dict["resnet18"].num_ch_enc),
#                 "resnet18_nn3":networks.NNDecoder(
#                     enc_dict["resnet18"].num_ch_enc, kernel_size=3, dw=False),
#                 "resnet18_nn3_dw":networks.NNDecoder(
#                     enc_dict["resnet18"].num_ch_enc, kernel_size=3, dw=True),
#                 "resnet18_nn5":networks.NNDecoder(
#                     enc_dict["resnet18"].num_ch_enc, kernel_size=5, dw=False),
#                 "resnet18_nn5_dw":networks.NNDecoder(
#                     enc_dict["resnet18"].num_ch_enc, kernel_size=5, dw=True),
#                 "resnet18_nn35":networks.NNDecoder(
#                     enc_dict["resnet18"].num_ch_enc, kernel_size=35, dw=False),
#                 "resnet18_nn53":networks.NNDecoder(
#                     enc_dict["resnet18"].num_ch_enc, kernel_size=53, dw=False),
#                 "resnet18_nn3_3x3":networks.NNDecoder(
#                     enc_dict["resnet18"].num_ch_enc, kernel_size=3, dw=False, pw=3),
#                 "resnet18_nn3_upconv":networks.NNDecoder(
#                     enc_dict["resnet18"].num_ch_enc, kernel_size=3, dw=False, upconv=True)}
# dec_dict = {"resnet18_my3":networks.MYDecoder(enc_dict["resnet18"].num_ch_enc),
#             "resnet18_my3_more1":networks.MYDecoder(enc_dict["resnet18"].num_ch_enc, moreConv=1),
#             "resnet18_my3_more2":networks.MYDecoder(enc_dict["resnet18"].num_ch_enc, moreConv=2),
#             "resnet18_my3_concatDepth":networks.MYDecoder(enc_dict["resnet18"].num_ch_enc, concatDepth=True),
#             "resnet18_my3_doubleConv1":networks.MYDecoder(enc_dict["resnet18"].num_ch_enc, doubleConv=1),
#             "resnet18_my3_doubleConv3":networks.MYDecoder(enc_dict["resnet18"].num_ch_enc, doubleConv=3),
#             "resnet18_my3_doubleConv3_3x3":networks.MYDecoder(enc_dict["resnet18"].num_ch_enc, doubleConv=3, pw=False),
#             "resnet18_my3_firstConv":networks.MYDecoder(enc_dict["resnet18"].num_ch_enc, firstConv=True),
#             "resnet18_my3_skipSky":networks.MYDecoder(enc_dict["resnet18"].num_ch_enc),
#             "resnet18_my3_firstConv_skipSky":networks.MYDecoder(enc_dict["resnet18"].num_ch_enc, firstConv=True),
#             "resnet18_my3_smooth":networks.MYDecoder(enc_dict["resnet18"].num_ch_enc),
#             "resnet18_my3_firstConv_skipSky_conv11":networks.MYDecoder(enc_dict["resnet18"].num_ch_enc, firstConv=True, conv11=True)}
# dec_dict = {"resnet18":networks.DepthDecoder(enc_dict["resnet18"].num_ch_enc),
# #                 "resnet18_nn3":networks.NNDecoder(
# #                     encoder_dict["resnet18"].num_ch_enc, kernel_size=3, dw=False),
# #                 "resnet18_my3":networks.MYDecoder(
# #                     encoder_dict["resnet18"].num_ch_enc, kernel_size=3, dw=False),
# #                 "resnet18_my3_dw":networks.MYDecoder(
# #                     enc_dict["resnet18"].num_ch_enc, kernel_size=3, dw=True),
#                 "resnet18_my35_nomask":networks.MYDecoder(
#                     enc_dict["resnet18"].num_ch_enc, kernel_size=35, dw=False),
# #                 "resnet18_my3_3x3":networks.MYDecoder(
# #                     enc_dict["resnet18"].num_ch_enc, kernel_size=3, dw=False, pw=3),
#                 "resnet18_my35_more":networks.MYDecoder(
#                     enc_dict["resnet18"].num_ch_enc, kernel_size=35, dw=False, more=True)}
dec_dict = {
    "resnet18_skipFirstConv":networks.DepthDecoder(enc_dict["resnet18"].num_ch_enc, skipFirstConv=True),
    "resnet18_skipFirstConv_skipSky":networks.DepthDecoder(enc_dict["resnet18"].num_ch_enc, skipFirstConv=True),
    "resnet18_my3_skipSky":networks.MYDecoder(enc_dict["resnet18"].num_ch_enc),
    "resnet18_my3":networks.MYDecoder(enc_dict["resnet18"].num_ch_enc),
    "resnet18_skip2Conv":networks.DepthDecoder(enc_dict["resnet18"].num_ch_enc, skip2Conv=True),
    "resnet18_skipFirstConv":networks.DepthDecoder(enc_dict["resnet18"].num_ch_enc, skipFirstConv=True),
}
               
               

In [9]:
"""Evaluates a pretrained model using a specified test set
"""
results = []
inWork = ["resnet18_my3", "resnet18_my3_more1", "resnet18_my3_more2", "resnet18_my3_concatDepth", "resnet18_my3_doubleConv1", "resnet18_my3_doubleConv3"]
fineTune = ["resnet18_my3", "resnet18_my3_skipSky"]
for name in dec_dict:
    if name in inWork:
        load_weights_folder = os.path.join("/work", "garin0115", "models", name+"_256x832", "models")
    else:
        load_weights_folder = os.path.join(os.path.expanduser("~"), 
                                       "depth", 
                                       "monodepth2",
                                       "models", 
                                       name+"_256x832", 
                                       "models")
    if name in fineTune:
        load_weights_folder = os.path.join(load_weights_folder, "weights_29")
    else:
        load_weights_folder = os.path.join(load_weights_folder, "weights_19")
    
    assert os.path.isdir(load_weights_folder), \
        "Cannot find a folder at {}".format(load_weights_folder)

    print("-> Loading weights from {}".format(load_weights_folder))

    filenames = readlines(os.path.join(splits_dir, eval_split, "test_files.txt"))
    encoder_path = os.path.join(load_weights_folder, "encoder.pth")
    decoder_path = os.path.join(load_weights_folder, "depth.pth")

    encoder_dict = torch.load(encoder_path)

    dataset = datasets.KITTIRAWDataset(data_path, filenames,
                                       encoder_dict['height'], encoder_dict['width'],
                                       [0], 4, is_train=False)
    dataloader = DataLoader(dataset, 16, shuffle=False, num_workers=16,
                            pin_memory=True, drop_last=False)

    encoder = enc_dict["resnet18"]
    depth_decoder = dec_dict[name]

    enc_param_count = calc_param(encoder)
    dec_param_count = calc_param(depth_decoder)
    print("[info] Model {}".format(name))
    print("[info] Encoder parameter count:", enc_param_count)
    print("[info] Decoder parameter count:", dec_param_count)
    print("[info] Total parameter count:", enc_param_count + dec_param_count)

    model_dict = encoder.state_dict()
    encoder.load_state_dict({k: v for k, v in encoder_dict.items() if k in model_dict})
    depth_decoder.load_state_dict(torch.load(decoder_path))

    encoder.cuda()
    encoder.eval()
    depth_decoder.cuda()
    depth_decoder.eval()

    pred_disps = []

    print("-> Computing predictions with size {}x{}".format(
        encoder_dict['width'], encoder_dict['height']))
    
    time_min = 10000
    with torch.no_grad():
        
        for data in dataloader:
            start_time = time.time()
            input_color = data[("color", 0, 0)].cuda()

            if post_process:
                # Post-processed results require each image to have two forward passes
                input_color = torch.cat((input_color, torch.flip(input_color, [3])), 0)
            start_time = time.time()
            features = encoder(input_color)
            output = depth_decoder(features)
            total_time = time.time() - start_time
            pred_disp, _ = disp_to_depth(output[("disp", 0)], MIN_DEPTH, MAX_DEPTH)
            pred_disp = pred_disp.cpu()[:, 0].numpy()
    #                 pred_disp = pred_disp[:, 0].numpy()

            if post_process:
                N = pred_disp.shape[0] // 2
                pred_disp = batch_post_process_disparity(pred_disp[:N], pred_disp[N:, :, ::-1])

            pred_disps.append(pred_disp)
            
            if total_time < time_min:
                time_min = total_time
                
    pred_disps = np.concatenate(pred_disps)


    gt_path = os.path.join(splits_dir, eval_split, "gt_depths.npz")
    gt_depths = np.load(gt_path, fix_imports=True, encoding='latin1', allow_pickle=True)["data"]
    #     gt_depths = np.load(gt_path)["data"]

    print("-> Evaluating")

    if eval_stereo:
        print("   Stereo evaluation - "
              "disabling median scaling, scaling by {}".format(STEREO_SCALE_FACTOR))
        disable_median_scaling = True
        pred_depth_scale_factor = STEREO_SCALE_FACTOR
    else:
        print("   Mono evaluation - using median scaling")

    errors = []
    ratios = []

    for i in range(pred_disps.shape[0]):

        gt_depth = gt_depths[i]
        gt_height, gt_width = gt_depth.shape[:2]

        pred_disp = pred_disps[i]
        pred_disp = cv2.resize(pred_disp, (gt_width, gt_height))
        pred_depth = 1 / pred_disp

        if eval_split == "eigen":
            mask = np.logical_and(gt_depth > MIN_DEPTH, gt_depth < MAX_DEPTH)

            crop = np.array([0.40810811 * gt_height, 0.99189189 * gt_height,
                             0.03594771 * gt_width,  0.96405229 * gt_width]).astype(np.int32)
            crop_mask = np.zeros(mask.shape)
            crop_mask[crop[0]:crop[1], crop[2]:crop[3]] = 1
            mask = np.logical_and(mask, crop_mask)

        else:
            mask = gt_depth > 0

        pred_depth = pred_depth[mask]
        gt_depth = gt_depth[mask]

        pred_depth *= pred_depth_scale_factor
        if not disable_median_scaling:
            ratio = np.median(gt_depth) / np.median(pred_depth)
            ratios.append(ratio)
            pred_depth *= ratio

        pred_depth[pred_depth < MIN_DEPTH] = MIN_DEPTH
        pred_depth[pred_depth > MAX_DEPTH] = MAX_DEPTH

        errors.append(compute_errors(gt_depth, pred_depth))

    if not disable_median_scaling:
        ratios = np.array(ratios)
        med = np.median(ratios)
        print(" Scaling ratios | med: {:0.3f} | std: {:0.3f}".format(med, np.std(ratios / med)))

    mean_errors = np.array(errors).mean(0)
    print("[info] {}".format(name))
    print(" FPS: ", 1/time_min)
    print("\n  " + ("{:>8} | " * 7).format("abs_rel", "sq_rel", "rmse", "rmse_log", "a1", "a2", "a3"))
    print(("&{: 8.3f}  " * 7).format(*mean_errors.tolist()) + "\\\\")
    print("\n-> Done!")
    

    flops_enc, params_enc = profile(encoder, inputs=(input_color, ))
    flops_dec, params_dec = profile(depth_decoder, inputs=(features, ))
    a, b, c, d, e, f = clever_format([params_enc+params_dec, params_enc, params_dec, flops_enc+flops_dec, flops_enc, flops_dec], "%.3f")
    
    result = []
    result.append(name)
    result.append(encoder_dict['height'])
    result.append(encoder_dict['width'])
    for i in mean_errors:
        result.append(i)
    result.append(1/time_min)
    for i in [a, b, c, d, e, f]:
        result.append(i)
    results.append(result)


-> Loading weights from /home/garin0115/depth/monodepth2/models/resnet18_skipFirstConv_256x832/models/weights_19
[info] Model resnet18_skipFirstConv
[info] Encoder parameter count: 11689512
[info] Decoder parameter count: 3742548
[info] Total parameter count: 15432060
-> Computing predictions with size 832x256
-> Evaluating
   Mono evaluation - using median scaling
 Scaling ratios | med: 3001.452 | std: 0.083
[info] resnet18_skipFirstConv
 FPS:  282.5969545883304

   abs_rel |   sq_rel |     rmse | rmse_log |       a1 |       a2 |       a3 | 
&   0.115  &   0.896  &   4.846  &   0.194  &   0.878  &   0.960  &   0.981  \\

-> Done!
-> Loading weights from /home/garin0115/depth/monodepth2/models/resnet18_skipFirstConv_skipSky_256x832/models/weights_19
[info] Model resnet18_skipFirstConv_skipSky
[info] Encoder parameter count: 11689512
[info] Decoder parameter count: 3742548
[info] Total parameter count: 15432060
-> Computing predictions with size 832x256
-> Evaluating
   Mono evaluation 

In [None]:
import csv

# 開啟輸出的 CSV 檔案
with open('result.csv', 'w', newline='') as csvfile:
    # 建立 CSV 檔寫入器
    writer = csv.writer(csvfile)

    # 寫入一列資料
    writer.writerow(['Model', 'Height', 'Width', "abs_rel", "sq_rel", "rmse", "rmse_log", "a1", "a2", "a3", 
                  'FPS', 'Parameters', 'params_enc', 'params_dec', 'FLOPs', 'fl_enc', 'fl_dec'])

    # 寫入另外幾列資料
    for res in results:
        writer.writerow(res)