In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.parameter import Parameter
import numpy as np
    
class FeatureNet(nn.Sequential):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
          nn.Conv2d(4, 32, 3),
          nn.Conv2d(32, 32, 3),
          nn.Conv2d(32, 32, 3),
          nn.BatchNorm2d(32),
          nn.ReLU(),
          nn.Conv2d(32, 64, 3, stride=2),
          nn.Conv2d(64, 128, 3),
          nn.Conv2d(128, 128, 3),
          nn.BatchNorm2d(128),
          nn.ReLU(),    
          nn.Conv2d(128, 256, 3, stride=2),
          nn.Conv2d(256, 256, 3),
          nn.Conv2d(256, 256, 3),
          nn.BatchNorm2d(256),
          nn.ReLU(),    
          nn.Conv2d(256, 196, 1),
          nn.BatchNorm2d(196),
          nn.ReLU(),     
        )
    
    def forward(self, data):
        return self.model(data)
    
class PoseRegression(nn.Sequential):
  
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
          nn.Linear(588, 294),
          nn.BatchNorm1d(294),
          nn.Linear(294, 147),
          nn.BatchNorm1d(147),
          nn.ReLU(),
          nn.Linear(147, 42),
          nn.BatchNorm1d(42),
          nn.ReLU(),
          nn.Linear(42, 21),
          nn.BatchNorm1d(21),
          nn.Tanh()
        )
    
    def forward(self, data):
        return self.model(data)
    
    

class Transformation(nn.Module):
    
    def __init__(self, rotation, translation):
        super().__init__()
        self.rotation = rotation
        self.translation = translation
        self.br = torch.transpose(rotation, 1, 0)
        self.bt = torch.matmul(self.br, translation)
        
    def forward(self, data):
        reprojected_pt = torch.matmul(self.rotation,data) + self.translation#[:,None]
        return reprojected_pt
    
    def backward(self, data):
        reprojected_pt = torch.matmul(self.br,data) + self.bt#[:,None]
        return reprojected_pt
        
class ExtendedSpatialSoftargMax(nn.Module):
  
    def __init__(self, height, width, channel, temperature=None, data_format='NCHW'):
        super(ExtendedSpatialSoftargMax, self).__init__()
        self.data_format = data_format
        self.height = height
        self.width = width
        self.channel = channel

        if temperature:
            self.temperature = Parameter(torch.ones(1)*temperature)
        else:
            self.temperature = 1.

        pos_x, pos_y = np.meshgrid(
                np.linspace(-1., 1., self.height),
                np.linspace(-1., 1., self.width)
                )
        pos_x = torch.from_numpy(pos_x.reshape(self.height*self.width)).float()
        pos_y = torch.from_numpy(pos_y.reshape(self.height*self.width)).float()
        self.register_buffer('pos_x', pos_x)
        self.register_buffer('pos_y', pos_y)

    def forward(self, feature, depth):
        # Output:
        #   (N, C*2) x_0 y_0 ...
        if self.data_format == 'NHWC':
            feature = feature.transpose(1, 3).tranpose(2, 3).view(-1, self.height*self.width)
        else:
            feature = feature.view(-1, self.height*self.width)

        softmax_attention = F.softmax(feature/self.temperature, dim=-1)
        expected_x = torch.sum(self.pos_x*softmax_attention, dim=1, keepdim=True)
        expected_y = torch.sum(self.pos_y*softmax_attention, dim=1, keepdim=True)
#         expected_xy = torch.cat([expected_x, expected_y], 1)
#         feature_keypoints = expected_xy.view(-1, self.channel*2)
        
        image_height = depth.shape[3]
        image_weight = depth.shape[2]
        
        ix = torch.round(expected_x * image_weight).long()
        iy = torch.round(expected_y * image_height).long()
        
        z = depth[:, 0, ix, iy]
        
        result = torch.cat([expected_x * z, expected_y * z, z], 1)
        feature_keypoints = result.view(-1, self.channel*3)

        return feature_keypoints


In [2]:
class Keyframe(nn.Module):
    def __init__(self, rotation, translation):
        super(Keyframe, self).__init__()
        self.feature = FeatureNet()
        self.extended_spatial_max = ExtendedSpatialSoftargMax(31,21,196)
        self.transform = Transformation(rotation, translation)
        self.pose_regress = PoseRegression()
        self.rotation = rotation
        self.translation = translation
              
    def forward(self, data, depth):
        output = self.feature(data)
        output = self.extended_spatial_max(output, depth)
        output = self.transform(output)
        output = self.pose_regress(output)
        return output

In [6]:
import math

def euler2rotm(theta):
    R_x = np.array([[1,         0,                  0                   ],
                    [0,         math.cos(theta[0]), -math.sin(theta[0]) ],
                    [0,         math.sin(theta[0]), math.cos(theta[0])  ]
                    ])
    R_y = np.array([[math.cos(theta[1]),    0,      math.sin(theta[1])  ],
                    [0,                     1,      0                   ],
                    [-math.sin(theta[1]),   0,      math.cos(theta[1])  ]
                    ])         
    R_z = np.array([[math.cos(theta[2]),    -math.sin(theta[2]),    0],
                    [math.sin(theta[2]),    math.cos(theta[2]),     0],
                    [0,                     0,                      1]
                    ])            
    R = np.dot(R_z, np.dot( R_y, R_x ))
    return R

def point_in_camera(point, rotation, translation):
    reprojected_pt = torch.matmul(rotation,point) + translation#[:,None]
    return reprojected_pt

In [7]:
rotation = torch.Tensor(euler2rotm([np.pi / 4, np.pi, -np.pi / 2]))
translation = torch.Tensor(np.array([1.0, 0, 0.75]).reshape((3,1)))

k = Keyframe(rotation, translation)

In [39]:
# coding=utf-8
# Copyright 2021 The Yan Li, UTK, Knoxville, TN.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import torch
import cv2 # for resize image
from os import listdir
from os.path import join
import pickle
from torch.utils.data import Dataset
import random as r
import numpy as np


class dataset(Dataset):
    def __init__(self, data_dir, transform=None, target_transform=None):
        self.data_dir = data_dir
        self.mean = [0.485, 0.456, 0.406] 
        self.std = [0.229, 0.224, 0.225]
        self.image_folder = join(data_dir, 'color')
        self.depth_folder = join(data_dir, 'depth')
        self.info_folder = join(data_dir, 'info')
        self.action_folder = join(data_dir, 'action')
        self.image_files = listdir(self.image_folder)
        self.depth_files = listdir(self.depth_folder)
        self.info_files = listdir(self.info_folder)
        self.action_files = listdir(self.action_folder)
        
    def __len__(self):
        return len(self.image_files)
    
    def __getitem__(self, idx):
        # image
        data = self.loadfile(join(self.image_folder, self.image_files[idx]))
        selected_frame = r.randint(1, data.shape[0])
        img = data[selected_frame,0]
        img = cv2.resize(img, dsize=(160, 120), interpolation=cv2.INTER_CUBIC)
        img = img / 255.0
        img = img - self.mean
        img = img / self.std
        
        # depth
        data = self.loadfile(join(self.depth_folder, self.depth_files[idx]))            
        depth = data[selected_frame,0]
        td = cv2.resize(depth, dsize=(160, 120), interpolation=cv2.INTER_CUBIC)        
        depth = torch.Tensor(data[selected_frame,0])
        color = torch.Tensor(img)
        td = torch.Tensor(td)
        td = torch.reshape(td, (120,160,1))
        data = torch.cat((color, td), 2)        
        data = data.permute(2, 0 ,1)
        data = torch.reshape(data, (1, 4, 120, 160))
        
        # info
        info = self.loadfile(join(self.info_folder, self.info_files[idx]))
        #action 
        action = self.loadfile(join(self.action_folder, self.action_files[idx]))        
        gt = self.resolveInfo(info, action, selected_frame)
        
        return data, depth, gt
    
    def loadfile(self, filename):
        with open(filename, 'rb') as f:
            return pickle.load(f)
        
    def resolveInfo(self, info, action, selected_frame):

        x = np.concatenate((np.array(info[selected_frame][5][0]), 
             np.array(info[selected_frame][5][1])),axis=0)
        y = np.concatenate((np.array(info[selected_frame][6][0]), 
             np.array(info[selected_frame][6][1])), axis=0)
        z = np.concatenate((np.array(action[selected_frame]['pose'][0]), 
             np.array(action[selected_frame]['pose'][1])),axis=0)
        
        gt = np.concatenate((x,y,z),axis=0)
        gt = torch.Tensor(gt)        
        return gt
        
#mean = [0.485, 0.456, 0.406] 
#std = [0.229, 0.224, 0.225]
#
#data_path = '../datasets/key_frame_identifier/block-insertion-test/'
#img_folder = data_path + 'color/'
#depth_folder = data_path + 'depth/'
#pcl_file = '000000-1'

In [40]:
data_dir = 'datasets/key_frame_identifier/block-insertion-test'
data = dataset(data_dir)

In [41]:
print(len(data))
img, depth, gt = data[999]
print(gt)

1000


error: OpenCV(4.5.2) :-1: error: (-5:Bad argument) in function 'resize'
> Overload resolution failed:
>  - src is not a numpy array, neither a scalar
>  - Expected Ptr<cv::UMat> for argument 'src'


In [38]:
x = torch.Tensor(7,1)
y = torch.Tensor(7,1)
z = torch.Tensor(7,1)

o = torch.cat((x,y,z),dim=0)
print(o.shape)

torch.Size([21, 1])
