In [None]:
class DepthMotionLightningModel(LightningModule):
    def __init__(self, hparams):
        
        super(DepthMotionLightningModel, self).__init__()
        self.default_loss_weights = {
                            'rgb_consistency': 1.0,
                            'ssim': 3.0,
                            'depth_consistency': 0.05,
                            'depth_smoothing': 0.05,
                            'rotation_cycle_consistency': 1e-3,
                            'translation_cycle_consistency': 5e-2,
                            'depth_variance': 0.0,
                            'motion_smoothing': 1.0,
                            'motion_drift': 0.2,
                        }
        self.hparams = hparams
        self.motion_field_burning_steps = 20000
        self.depth_net = DispNetS()
        intrinsics_mat = None
        if self.hparams.intrinsics:
            intrinsics_mat = np.loadtxt('./intrinsics.txt', delimiter=',')
            intrinsics_mat = intrinsics_mat.reshape(3, 3)
        self.object_motion_net = MotionVectorNet(auto_mask=True, 
                        intrinsics=self.hparams.intrinsics, intrinsics_mat=intrinsics_mat)
        self.loss_func = DMPLoss(self.default_loss_weights)
        self.delete_file = True
        train_batches = len(self.train_dataloader())
        
        self.base_step = (train_batches) // self.hparams.accumulate_grad_batches
        # torch.autograd.set_detect_anomaly(True)

    def validation_step(self, batch, batch_idx):
        
        endpoints = self.forward(batch, batch_idx, train=False)
        loss_val = self.loss_func(endpoints)
        if self.trainer.use_dp or self.trainer.use_ddp2:
            loss_val = loss_val.unsqueeze(0)
        outputs = OrderedDict({
            'val_loss': loss_val,
        })
        return outputs

    def validation_epoch_end(self, outputs):

        tqdm_dict = {}
        for metric_name in ["val_loss"]:
            metric_total = 0
            for output in outputs:
                metric_value = output[metric_name]
                # reduce manually when using dp
                if self.trainer.use_dp or self.trainer.use_ddp2:
                    metric_value = torch.mean(metric_value)
                metric_total += metric_value
            tqdm_dict[metric_name] = metric_total / len(outputs)

        result = {'progress_bar': tqdm_dict, 'log': tqdm_dict, 'val_loss': tqdm_dict["val_loss"]}
        return result

    def val_dataloader(self):
        val_dataset = DepthMotionDataset(mode='valid', transform=transforms.Compose([
                                        transforms.Resize(size=rsize_factor),
                                        transforms.ToTensor(),
                                    ]),
                                    root_dir='./',
                                    )
        val_loader = torch.utils.data.DataLoader(
                                        dataset=val_dataset,
                                        batch_size=self.hparams.batch_size,
                                        shuffle=False,
                                        num_workers=8,
                                        drop_last = False,
                                        sampler=None,
                                        pin_memory=False,
                                    )
        print ("Total valid example : {}".format((len(val_loader.dataset))))
        return val_loader

    @staticmethod
def add_model_specific_args(parent_parser):
    parser = argparse.ArgumentParser(parents=[parent_parser])
        
    parser.add_argument('--epochs', default=90, type=int, metavar='N',
                            help='number of total epochs to run')
    parser.add_argument('--seed', type=int, default=42,
                            help='seed for initializing training. ')
    parser.add_argument('-b', '--batch-size', default=8, type=int,
                        metavar='N',
                        help='mini-batch size (default: 256), this is the total batch size of all GPUs on the current node when using Data Parallel or Distributed Data Parallel')
    parser.add_argument('--lr', '--learning-rate', default=1e-4, type=float,
                        metavar='LR', help='initial learning rate', dest='lr')
    parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
                        help='momentum')
    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
                        metavar='W', help='weight decay (default: 1e-4)',
                        dest='weight_decay')
    parser.add_argument('--pretrained', dest='pretrained', action='store_true',
                        help='use pre-trained model')
    parser.add_argument('--intrinsics', dest='intrinsics', action='store_true',
                        help='use specified intrinsics')
    return parser

def get_args():
    parent_parser = argparse.ArgumentParser(add_help=False)
    parent_parser.add_argument('--gpus', type=int, default=0,
                               help='how many gpus')
    parent_parser.add_argument('--distributed-backend', type=str, default='dp', choices=('dp', 'ddp', 'ddp2'),
                               help='supports three options dp, ddp, ddp2')
    parent_parser.add_argument('--use-16bit', dest='use_16bit', action='store_true',
                               help='if true uses 16 bit precision')
    parent_parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
                               help='evaluate model on validation set')
    parent_parser.add_argument('-cf', '--clear-folder', dest='clear_folder', action='store_true',
                               help='clear the folder')
    parent_parser.add_argument('-agb', '--accumulate-grad-batches', dest='accumulate_grad_batches',type=int,
                                default=4)

    parser = DepthMotionLightningModel.add_model_specific_args(parent_parser)
    return parser.parse_args()

In [None]:
import os
import random
import argparse
import numpy as np
import pandas as pd
import json
from collections import OrderedDict
from PIL import Image
import imageio

import torch
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
from torchvision import transforms, utils
from torch.utils.data import Dataset, DataLoader

import intrinsics_utils
from loss_fn import DMPLoss
from depth_prediction_net import DispNetS
from object_motion_net import MotionVectorNet

In [None]:
def infer_ego_motion(rot, trans):
    """
        Infer ego motion (pose) using rot and trans matrix.
        Args:
            rot : rotation matrix.
            trans : translational matrix.
        Returns :
            avg_rot : rotation matrix for trajectory in world co-ordinates system.
            avg_trans : translation matrix for trajectory in world co-ordinates system.
    """
    rot12, rot21 = rot
    rot12 = matrix_from_angles(rot12)
    rot21 = matrix_from_angles(rot21)
    trans12, trans21 = trans

    avg_rot = 0.5 * (torch.linalg.inv(rot21) + rot12)
    avg_trans = 0.5 * (-torch.squeeze(
        torch.matmul(rot12, torch.unsqueeze(trans21, -1)), dim=-1) + trans12)
    return avg_rot, avg_trans

transform=transforms.Compose([transforms.Resize(size=(128,416)),
                              transforms.ToTensor(),
                             ])
trajectory, positions = [], []
position = np.zeros(3)
orientation = np.eye(3)
    
    # Model Architecture
if args.gpus != -1:
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
else:
    device = "cpu"

depth_net = DispNetS()
object_motion_net = MotionVectorNet(auto_mask=True, intrinsics=args.intrinsics, intrinsics_mat=intrinsics_mat)
# Load Model
# model = torch.load(args.model_path, map_location=device)["state_dict"]
# depth_model = { k.replace("depth_net.", "") : v for k, v in model.items() if "depth" in k}
# depth_net.load_state_dict(depth_model)
# object_model = { k.replace("object_motion_net.", "") : v for k, v in model.items() if "object" in k}
# object_motion_net.load_state_dict(object_model)
depth_net.eval()
object_motion_net.eval()

sample_a = transform(sample_a)
sample_b = transform(sample_b)

endpoints = {}
rgb_seq_images = [sample_a.unsqueeze(0), sample_b.unsqueeze(0)]
rgb_images = torch.cat((rgb_seq_images[0], rgb_seq_images[1]), dim=0)
        
depth_images = depth_net(rgb_images)
depth_seq_images = torch.split(depth_images, depth_images.shape[0] // 2, dim=0)
        
endpoints['predicted_depth'] = depth_seq_images
endpoints['rgb'] = rgb_seq_images
motion_features = [torch.cat((endpoints['rgb'][0], endpoints['predicted_depth'][0]), dim=1),
                   torch.cat((endpoints['rgb'][1], endpoints['predicted_depth'][1]), dim=1)]
motion_features_stack = torch.cat(motion_features, dim=0)
flipped_motion_features_stack = torch.cat(motion_features[::-1], dim=0)
pairs = torch.cat([motion_features_stack, flipped_motion_features_stack], dim=1)
        
rot, trans, residual_translation, intrinsics_mat = object_motion_net(pairs)
endpoints['residual_translation'] = torch.split(residual_translation, residual_translation.shape[0] // 2, dim=0)
endpoints['background_translation'] = torch.split(trans, trans.shape[0] // 2, dim=0)
endpoints['rotation'] = torch.split(rot, rot.shape[0] // 2, dim=0)
intrinsics_mat = 0.5 * sum(torch.split(intrinsics_mat, intrinsics_mat.shape[0] // 2, dim=0))
endpoints['intrinsics_mat'] = [intrinsics_mat] * 2
endpoints['intrinsics_mat_inv'] = [intrinsics_utils.invert_intrinsics_matrix(intrinsics_mat)] * 2

rot, trans = infer_ego_motion(endpoints['rotation'], endpoints['background_translation'])
rot_angles = angles_from_matrix(rot).detach().cpu().numpy()
rot, trans = rot.detach().cpu().numpy(), trans.detach().cpu().numpy()
orientation = np.dot(orientation, rot[0])
trajectory.append(np.concatenate((np.concatenate((orientation, trans.T), axis=1), [[0, 0, 0, 1]]), axis=0))
position += np.dot(orientation, trans[0])
positions.append(position)
trajectory = np.vstack(trajectory) # Trajectories - 4x4 Pose matrix will be stored in [(N-1)*4,4] vector in trajectory.txt
positions = np.array(positions) # Positions - 1x3 will be stored as [(N-1),3] vector in positions.txt
np.savetxt('./trajectory.txt', trajectory)
np.savetxt('./positions.txt', positions)