In [90]:
import torch
from imageio import imread, imsave
from skimage.transform import resize
from skimage.util import img_as_float
import numpy as np
from path import Path
import argparse
from tqdm import tqdm
import pdb
from models import DispNetS
from utils import tensor2array
from models import PoseExpNet
import custom_transforms
from utils import tensor2array, save_checkpoint, save_path_formatter, log_output_tensorboard
import pdb
from loss_functions import photometric_reconstruction_loss, explainability_loss, smooth_loss, blurry_loss
from loss_functions import compute_depth_errors, compute_pose_errors
from inverse_warp import *
from logger import TermLogger, AverageMeter
from tensorboardX import SummaryWriter
from datasets.sequence_folders import SequenceFolder
from torchvision import transforms
from torchvision.utils import save_image
import imageio

In [2]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

disp_net = DispNetS().to(device)
pose_exp_net = PoseExpNet(nb_ref_imgs=2, output_exp=False).to(device)
weights = torch.load('checkpoints/kitti_raw_prepared,epoch_size3000,b64,m0.2/09-19-18:48/dispnet_model_best.pth.tar')
disp_net.load_state_dict(weights['state_dict'])

weights = torch.load('checkpoints/kitti_raw_prepared,epoch_size3000,b64,m0.2/09-19-18:48/exp_pose_model_best.pth.tar')
pose_exp_net.load_state_dict(weights['state_dict'], strict=False)

disp_net = torch.nn.DataParallel(disp_net,device_ids=[0,1,2])
pose_exp_net = torch.nn.DataParallel(pose_exp_net,device_ids=[0,1,2])

In [3]:
normalize = custom_transforms.Normalize(mean=[0.5, 0.5, 0.5],
                                            std=[0.5, 0.5, 0.5])
train_transform = custom_transforms.Compose([
    custom_transforms.RandomHorizontalFlip(),
    custom_transforms.RandomScaleCrop(),
    custom_transforms.ArrayToTensor(),
    normalize
])

train_set = SequenceFolder(
    'KITTI/kitti_raw_prepared/',
    transform=train_transform,
    seed=0,
    train=True,
    sequence_length=3
    )
train_loader = torch.utils.data.DataLoader(
        train_set, batch_size=1, shuffle=True,
        num_workers=4, pin_memory=True)

In [4]:
inv_normalize = transforms.Normalize(
    mean=[-1,-1,-1],
    std=[1/0.5, 1/0.5, 1/0.5]
)

In [93]:
results = []
for i, (tgt_img, ref_imgs, intrinsics, intrinsics_inv) in enumerate(train_loader):
        # measure data loading time
        tgt_img = tgt_img.to(device)
        ref_imgs = [img.to(device) for img in ref_imgs]
        intrinsics = intrinsics.to(device)

        # compute output
        disparities = disp_net(tgt_img)
        depth = [1/disp for disp in disparities]
  
        depth_refs = []
        for ref_img in ref_imgs:
            disparities_refs = disp_net(ref_img)[0]
            depth_refs.append(1/disparities_refs)
        explainability_mask, pose = pose_exp_net(tgt_img, ref_imgs=ref_imgs)
        blurry_img,valid_points = blurry_image(ref_imgs,depth_refs, pose, intrinsics,'euler', 'border')
#         blurry_img = blurry_img * valid_points.unsqueeze(1).float()
        disparities_blurry = disp_net(blurry_img)
        depth_blurry = [1/disp for disp in disparities_blurry]
        explainability_mask_blurry, pose_blurry = pose_exp_net(target_image=blurry_img, blurry = True)
      
        blurry_img_ = inv_normalize(blurry_img)
        target_img_ = inv_normalize(tgt_img)
        warp,_ = inverse_warp(blurry_img,depth_blurry[0][:,0],pose_blurry[:,0],intrinsics)

        save_image(blurry_img_, 'blurry/' + str(i) + '.png')
        save_image(target_img_, 'blurry/' + str(i) + '_original.png')
        save_image(inv_normalize(warp),'blurry/'+str(i) + '_warp.png')
        disp = (255*tensor2array(disparities[0], max_value=None, colormap='bone')).astype(np.uint8)
        imageio.imsave('blurry/'+str(i) + '_disp.png', np.transpose(disp, (1,2,0)))
        depth = (255*tensor2array(depth[0], max_value=None, colormap='rainbow')).astype(np.uint8)
        imageio.imsave('blurry/'+str(i) + '_depth.png', np.transpose(depth, (1,2,0)))
        for j in range(len(ref_imgs)):
            save_image(inv_normalize(ref_imgs[j]),'blurry/'+str(i) + '_ref_'+str(j)+'.png')
       
        if i > 20:
            break

In [72]:
cam_coords = pixel2cam(depth[0][:,0], intrinsics.inverse())  # [B,3,H,W]
pose_mat = pose_vec2mat(pose_blurry[:,0], 'euler')  # [B,3,4]
# Get projection matrix for tgt camera frame to source pixel frame
proj_cam_to_src_pixel = intrinsics @ pose_mat  # [B, 3, 4]

In [81]:
rot, tr = proj_cam_to_src_pixel[..., :3], proj_cam_to_src_pixel[..., -1:]
src_pixel_coords = cam2pixel(cam_coords, rot.inverse(), )  # [B,H,W,2]

projected_img = F.grid_sample(blurry_img, src_pixel_coords, padding_mode='border', align_corners=True)


In [82]:
save_image(inv_normalize(projected_img),'test.png')

In [70]:
src_pixel_coords

tensor([[[[-1.0007, -1.0007],
          [-1.0010, -1.0010],
          [-1.0012, -1.0013],
          ...,
          [-0.9986, -1.0011],
          [-0.9988, -1.0009],
          [-0.9991, -1.0007]],

         [[-1.0008, -1.0008],
          [-1.0011, -1.0011],
          [-1.0013, -1.0013],
          ...,
          [-0.9986, -1.0011],
          [-0.9988, -1.0009],
          [-0.9991, -1.0007]],

         [[-1.0008, -1.0008],
          [-1.0011, -1.0011],
          [-1.0013, -1.0013],
          ...,
          [-0.9986, -1.0011],
          [-0.9988, -1.0009],
          [-0.9990, -1.0007]],

         ...,

         [[-1.0004, -0.9995],
          [-1.0005, -0.9995],
          [-1.0005, -0.9995],
          ...,
          [-0.9994, -0.9994],
          [-0.9993, -0.9994],
          [-0.9993, -0.9994]],

         [[-1.0004, -0.9995],
          [-1.0005, -0.9995],
          [-1.0005, -0.9995],
          ...,
          [-0.9993, -0.9994],
          [-0.9992, -0.9993],
          [-0.9993, -0.9994]],



In [36]:
src_pixel_coords

tensor([[[[-0.9663, -1.0306],
          [-0.9670, -1.0588],
          [-0.9667, -1.0817],
          ...,
          [ 1.0139, -1.0786],
          [ 1.0163, -1.0649],
          [ 1.0168, -1.0405]],

         [[-0.9699, -1.0341],
          [-0.9696, -1.0569],
          [-0.9669, -1.0680],
          ...,
          [ 1.0142, -1.0656],
          [ 1.0166, -1.0514],
          [ 1.0160, -1.0211]],

         [[-0.9681, -1.0097],
          [-0.9691, -1.0394],
          [-0.9670, -1.0529],
          ...,
          [ 1.0141, -1.0501],
          [ 1.0165, -1.0361],
          [ 1.0173, -1.0134]],

         ...,

         [[-0.9410,  0.9328],
          [-0.9375,  0.9291],
          [-0.9345,  0.9238],
          ...,
          [ 0.9763,  0.9467],
          [ 0.9832,  0.9390],
          [ 0.9885,  0.9371]],

         [[-0.9401,  0.9502],
          [-0.9380,  0.9420],
          [-0.9343,  0.9394],
          ...,
          [ 0.9770,  0.9591],
          [ 0.9862,  0.9428],
          [ 0.9889,  0.9501]],



In [37]:
cam_coords.shape

torch.Size([1, 3, 128, 416])

In [13]:
a.inverse()

RuntimeError: linalg.inv: A must be batches of square matrices, but they are 3 by 4 matrices

In [26]:
-1*pose_blurry[:,0]

tensor([[-0.0054, -0.0087, -0.0062, -0.0454, -0.0115, -0.0043]],
       device='cuda:0', grad_fn=<MulBackward0>)

In [27]:
pose_blurry[:,0]

tensor([[0.0054, 0.0087, 0.0062, 0.0454, 0.0115, 0.0043]], device='cuda:0',
       grad_fn=<SelectBackward0>)

In [61]:
depth_blurry = result[-1][0]
pose_blurry = result[3]

In [62]:
result[2][0].shape

torch.Size([1, 128, 416])

In [65]:
a,b = inverse_warp(result[4],depth_blurry[:,0],pose_blurry[:,0],intrinsics)

In [66]:
a_ = inv_normalize(a)

In [67]:
save_image(a_,'test.png')

In [64]:
from torchvision.utils import save_image
i = 0
for result in results:
    blurry_img.shape #torch.Size([64,3,28,28])
    blurry_img_ = result[2][0] #torch.Size([3,28,28]
    target_img_ = result[0][0]
#     blurry_depth_ = 
#     target_depth_ = 
    
    warp = result[-1][0]
    # img1 = img1.numpy() # TypeError: tensor or list of tensors expected, got <class 'numpy.ndarray'>
    save_image(blurry_img_, 'blurry/' + str(i) + '.png')
    save_image(target_img_, 'blurry/' + str(i) + '_original.png')
    save_image(warp,'blurry/'+ str(i) + '_warp.png')
    i+=1

In [29]:
blurry_img_

tensor([[[-0.4844, -0.4670, -0.4471,  ...,  0.1483,  0.1522,  0.1328],
         [-0.4740, -0.4789, -0.4819,  ...,  0.1733,  0.1839,  0.1601],
         [-0.4793, -0.4649, -0.4704,  ...,  0.1532,  0.1717,  0.1827],
         ...,
         [-0.4134, -0.4127, -0.4106,  ..., -0.2205, -0.1676, -0.1525],
         [-0.4071, -0.4041, -0.4017,  ..., -0.2750, -0.2662, -0.2363],
         [-0.4007, -0.3985, -0.3934,  ..., -0.2883, -0.3140, -0.2975]],

        [[-0.4530, -0.4369, -0.4231,  ...,  0.4229,  0.4296,  0.4538],
         [-0.4444, -0.4526, -0.4570,  ...,  0.4195,  0.4300,  0.4675],
         [-0.4479, -0.4336, -0.4395,  ...,  0.3739,  0.3968,  0.4583],
         ...,
         [-0.4029, -0.4000, -0.3976,  ..., -0.2303, -0.1841, -0.1724],
         [-0.3956, -0.3923, -0.3881,  ..., -0.3058, -0.2971, -0.2573],
         [-0.3892, -0.3868, -0.3782,  ..., -0.3241, -0.3381, -0.3111]],

        [[-0.4452, -0.4304, -0.4227,  ...,  0.4710,  0.4709,  0.4713],
         [-0.4384, -0.4499, -0.4556,  ...,  0

In [None]:

parser = argparse.ArgumentParser(description='Inference script for DispNet learned with \
                                 Structure from Motion Learner inference on KITTI and CityScapes Dataset',
                                 formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("--output-disp", action='store_true', help="save disparity img")
parser.add_argument("--output-depth", action='store_true', help="save depth img")
parser.add_argument("--pretrained", required=True, type=str, help="pretrained DispNet path")
parser.add_argument("--img-height", default=128, type=int, help="Image height")
parser.add_argument("--img-width", default=416, type=int, help="Image width")
parser.add_argument("--no-resize", action='store_true', help="no resizing is done")

parser.add_argument("--dataset-list", default=None, type=str, help="Dataset list file")
parser.add_argument("--dataset-dir", default='.', type=str, help="Dataset directory")
parser.add_argument("--output-dir", default='output', type=str, help="Output directory")

parser.add_argument("--img-exts", default=['png', 'jpg', 'bmp'], nargs='*', type=str, help="images extensions to glob")
try:
    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
except:
    pdb.set_trace()
@torch.no_grad()
def main():
    args = parser.parse_args()
    if not(args.output_disp or args.output_depth):
        print('You must at least output one value !')
        return

    disp_net = DispNetS().to(device)
    weights = torch.load(args.pretrained)
    disp_net.load_state_dict(weights['state_dict'])
    disp_net.eval()

    dataset_dir = Path(args.dataset_dir)
    output_dir = Path(args.output_dir)
    output_dir.makedirs_p()

    if args.dataset_list is not None:
        with open(args.dataset_list, 'r') as f:
            test_files = [dataset_dir/file for file in f.read().splitlines()]
    else:
        test_files = sum([list(dataset_dir.walkfiles('*.{}'.format(ext))) for ext in args.img_exts], [])

    print('{} files to test'.format(len(test_files)))

    for file in tqdm(test_files):

        img = img_as_float(imread(file))

        h,w,_ = img.shape
        if (not args.no_resize) and (h != args.img_height or w != args.img_width):
            img = resize(img, (args.img_height, args.img_width))
        img = np.transpose(img, (2, 0, 1))

        tensor_img = torch.from_numpy(img.astype(np.float32)).unsqueeze(0)
        tensor_img = ((tensor_img - 0.5)/0.5).to(device)

        output = disp_net(tensor_img)[0]

        file_path, file_ext = file.relpath(args.dataset_dir).splitext()
        file_name = '-'.join(file_path.splitall()[1:])

        if args.output_disp:
            disp = (255*tensor2array(output, max_value=None, colormap='bone')).astype(np.uint8)
            imsave(output_dir/'{}_disp{}'.format(file_name, file_ext), np.transpose(disp, (1,2,0)))
        if args.output_depth:
            depth = 1/output
            depth = (255*tensor2array(depth, max_value=None, colormap='rainbow')).astype(np.uint8)
            imsave(output_dir/'{}_depth{}'.format(file_name, file_ext), np.transpose(depth, (1,2,0)))


if __name__ == '__main__':
    main()
