## Import Libraries and Setup Argoverse Stereo Data Loader 

In [1]:
%matplotlib notebook

import copy
import json
import shutil
import os
import time
import math
import random
from PIL import Image
from pathlib import Path
from multiprocessing import Pool

import cv2
import matplotlib.pyplot as plt
import numpy as np
import open3d as o3d
import plotly.graph_objects as go
from PSMNet.models import *
import pandas as pd

from argoverse.data_loading.stereo_dataloader import ArgoverseStereoDataLoader
from argoverse.evaluation.stereo.eval import StereoEvaluator
from argoverse.utils.calibration import get_calibration_config
from argoverse.utils.camera_stats import RECTIFIED_STEREO_CAMERA_LIST

import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torch.nn.functional as F

STEREO_FRONT_LEFT_RECT = RECTIFIED_STEREO_CAMERA_LIST[0]
STEREO_FRONT_RIGHT_RECT = RECTIFIED_STEREO_CAMERA_LIST[1]


# Path to the dataset (please change accordingly).
data_dir = "./argoverse_stereo_v1.1/"

# Choosing the data split: train, val, or test (note that we do not provide ground truth for the test set).
split_name = "val"

# Choosing a specific log id. For example, 273c1883-673a-36bf-b124-88311b1a80be.
log_ids = os.listdir('./argoverse_stereo_v1.1/rectified_stereo_images_v1.1/val/')

# Creating the Argoverse Stereo data loader.
stereo_data_loader = ArgoverseStereoDataLoader(data_dir, split_name)

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


## Define Function to Load Model

In [2]:
# Function that loads pre-trained model
def configure_PSMNet_model(loadmodel,
                           model_struct,
                           maxdisp=192,
                           no_cuda=False,
                           seed=1):
    
    # Configure Cude and Torch
    args_cuda = not no_cuda and torch.cuda.is_available()
    torch.manual_seed(seed)
    if args_cuda:
        torch.cuda.manual_seed(seed)

    # Pick model structure based on input
    if model_struct == 'stackhourglass':
        model = stackhourglass(maxdisp)
    elif model_struct == 'basic':
        model = basic(maxdisp)
    else:
        print('no model')

    # Initial model
    model = nn.DataParallel(model, device_ids=[0])
    model.cuda()
    
    # Load model parameter values
    if loadmodel is not None:
        print('load PSMNet')
        state_dict = torch.load(loadmodel)
        model.load_state_dict(state_dict['state_dict'])

    print('Number of model parameters: {}'.format(sum([p.data.nelement() for p in model.parameters()])))
    return model, args_cuda

## Define Function to Produce Disparity Map

In [5]:
# Produce a disparity map from stereo image pair
def test(model,args_cuda,imgL,imgR):
    model.eval()

    # Convert images to cuda
    if args_cuda:
        imgL = imgL.cuda()
        imgR = imgR.cuda()     

    # Produce disparity map from images
    with torch.no_grad():
        disp = model(imgL,imgR)

    # COnvert disparity model to numpy array
    disp = torch.squeeze(disp)
    pred_disp = disp.data.cpu().numpy()

    return pred_disp

# Produce disparity map from input model and stereo image pairs
def testImagePSMNet(model, args_cuda, log_id, leftimg, rightimg):
    
    # Load stereo image pair
    imgL_o = Image.open(leftimg).convert('RGB')
    imgR_o = Image.open(rightimg).convert('RGB')

    # Resize image to fit in GPU memory
    height, width = imgL_o.size
    newsize = (int(height*0.25), int(width*0.25))
    imgL_o = imgL_o.resize(newsize)
    imgR_o = imgR_o.resize(newsize)
    
    # Adjust images to be consistent with trainging images
    normal_mean_var = {'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225]}
    infer_transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(**normal_mean_var)])    
    imgL = infer_transform(imgL_o)
    imgR = infer_transform(imgR_o) 

    # pad to width and hight to 16 times
    if imgL.shape[1] % 16 != 0:
        times = imgL.shape[1]//16       
        top_pad = (times+1)*16 -imgL.shape[1]
    else:
        top_pad = 0

    if imgL.shape[2] % 16 != 0:
        times = imgL.shape[2]//16                       
        right_pad = (times+1)*16-imgL.shape[2]
    else:
        right_pad = 0    
    imgL = F.pad(imgL,(0,right_pad, top_pad,0)).unsqueeze(0)
    imgR = F.pad(imgR,(0,right_pad, top_pad,0)).unsqueeze(0)

    # Produce disparity map for stereo pair
    pred_disp = test(model, args_cuda, imgL, imgR)
    
    # Remove padding from disparity map
    if top_pad !=0 and right_pad != 0:
        img = pred_disp[top_pad:,:-right_pad]
    elif top_pad ==0 and right_pad != 0:
        img = pred_disp[:,:-right_pad]
    elif top_pad !=0 and right_pad == 0:
        img = pred_disp[top_pad:,:]
    else:
        img = pred_disp
    
    # Format disparity map and resize to full scale
    newsize = (height, width)
    img = (img * 4 * 256).astype('uint16')
    img = Image.fromarray(img)
    img = img.resize(newsize)

    # Save disparity map results
    timestamp = int(Path(leftimg).stem.split("_")[-1])
    save_dir_disp = f"./results/PSMNet_results/{log_id}/"
    Path(save_dir_disp).mkdir(parents=True, exist_ok=True)
    filename = f"{save_dir_disp}/disparity_{timestamp}.png"
    img.save(filename)

## Load PSMNet and and Test Images 

In [6]:
# Load PSMNet pretrained model
print("Loading pre-trained PSMNet Model")
modelPSMNet, args_cuda = configure_PSMNet_model('./PSMNet/pretrained_model_KITTI2015.tar',
                                                'stackhourglass', 192)

# Put each stereo pair through inference pipeline
t0 = time.time()
imageCount = 0
print('Creating Disparity Maps for Test Image set')

# Loop through each log
for log_id in log_ids:
    
    # Loading the left rectified stereo image paths for the chosen log.
    left_stereo_img_fpaths = stereo_data_loader.get_ordered_log_stereo_image_fpaths(
        log_id=log_id,
        camera_name=STEREO_FRONT_LEFT_RECT)
    
    # Loading the right rectified stereo image paths for the chosen log.
    right_stereo_img_fpaths = stereo_data_loader.get_ordered_log_stereo_image_fpaths(
        log_id=log_id,
        camera_name=STEREO_FRONT_RIGHT_RECT)
    
    # Loop through each image in specific log
    for idx in range(0,len(left_stereo_img_fpaths)):
        testImagePSMNet(modelPSMNet, args_cuda, log_id, left_stereo_img_fpaths[idx], right_stereo_img_fpaths[idx])
    
    # Report status and latency
    imageCount = imageCount + len(left_stereo_img_fpaths) 
    print('Elapsed Time:', time.time()-t0, 'Time per Image:', (time.time()-t0)/imageCount)

Loading pre-trained PSMNet Model
load PSMNet
Number of model parameters: 5224768
Creating Disparity Maps for Test Image set




Elapsed Time: 93.56798481941223 Time per Image: 1.299555351336797
Elapsed Time: 193.993421792984 Time per Image: 1.3287220719742447
Elapsed Time: 351.7331702709198 Time per Image: 1.3902496876923933
Elapsed Time: 460.9530074596405 Time per Image: 1.409642226834545
Elapsed Time: 564.4595625400543 Time per Image: 1.4290115525450888
Elapsed Time: 675.8954269886017 Time per Image: 1.4442209998766582
Elapsed Time: 787.4165678024292 Time per Image: 1.452798096456211
Elapsed Time: 899.8618860244751 Time per Image: 1.4608147508138185
Elapsed Time: 1118.9135887622833 Time per Image: 1.4741944525395771
Elapsed Time: 1228.6609108448029 Time per Image: 1.476755903317378
Elapsed Time: 1391.4110708236694 Time per Image: 1.4849637900752473
Elapsed Time: 1502.791784286499 Time per Image: 1.487912658181521
Elapsed Time: 1725.4919848442078 Time per Image: 1.4926401257102464
Elapsed Time: 1836.3080804347992 Time per Image: 1.4929333993089877
Elapsed Time: 1948.0599575042725 Time per Image: 1.495057527230

## Get Performance Results from Disparity Map Results 

In [8]:
t0 = time.time()
imageCount = 0
data = pd.DataFrame()

for log_id in log_ids:
    # Path to the predicted disparity maps.
    save_dir_disp = f'./results/PSMNet_results/{log_id}/'
    pred_dir = Path(save_dir_disp)

    # Path to the ground-truth disparity maps.
    gt_dir = Path(f"{data_dir}/disparity_maps_v1.1/{split_name}/{log_id}")

    # Path to save the disparity error image.
    save_figures_dir = Path(f'./results/PSMNet_error_results/')
    save_figures_dir.mkdir(parents=True, exist_ok=True)

    # Creating the stereo evaluator.
    evaluator = StereoEvaluator(
        pred_dir,
        gt_dir,
        save_figures_dir,
        save_disparity_error_image=True,
        num_procs=-1,
    )

    # Running the stereo evaluation.
    metrics, data, errors = evaluator.evaluate(data)
    print('Elapsed Time:', time.time()-t0)
    
# Printing the quantitative results (using json trick for organized printing).
print('Results for Full Images')
print('==============================')
print(f"{json.dumps(metrics, sort_keys=False, indent=4)}")

Elapsed Time: 15.410041093826294
Elapsed Time: 31.57341456413269
Elapsed Time: 53.35528588294983
Elapsed Time: 70.06567811965942
Elapsed Time: 85.68302917480469
Elapsed Time: 101.5634183883667
Elapsed Time: 118.16903924942017
Elapsed Time: 134.73144793510437
Elapsed Time: 164.47940135002136
Elapsed Time: 180.49587106704712
Elapsed Time: 202.35144019126892
Elapsed Time: 219.06235003471375
Elapsed Time: 251.18495297431946
Elapsed Time: 269.6106233596802
Elapsed Time: 288.81002497673035
Elapsed Time: 324.3024377822876
Elapsed Time: 341.3530025482178
Results for Full Images
{
    "all:10": 9.796292466655133,
    "fg:10": 11.024064570773753,
    "bg:10": 9.477696348582793,
    "all*:10": 9.796291328198063,
    "fg*:10": 11.02405921379918,
    "bg*:10": 9.477696348582793,
    "all:5": 46.348040605867205,
    "fg:5": 43.19114041703676,
    "bg:5": 47.16722858214497,
    "all*:5": 46.348040502453884,
    "fg*:5": 43.19113948120579,
    "bg*:5": 47.16722858214497,
    "all:3": 50.66421284782071