In [2]:
import tensorflow as tf
import numpy as np
import os
import glob
import pykitti
from util import *
from colmap_model import *

In [3]:
def _bytes_feature(value):
  """Returns a bytes_list from a string / byte."""
  return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _float_feature(value):
  """Returns a float_list from a float / double."""
  return tf.train.Feature(float_list=tf.train.FloatList(value=value))

def _int64_feature(value):
  """Returns an int64_list from a bool / enum / int / uint."""
  return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))


In [4]:
data_dir = "/Users/dewalgupta/Documents/ucsd/lab/data/"
depth_dir = os.path.join(data_dir, 'KITTI/raw/2011_09_30/2011_09_30_drive_0016_sync/post_processed_depth')
img_dir = os.path.join(data_dir, 'KITTI/raw/2011_09_30/2011_09_30_drive_0016_sync/image_02/data')
category = 'kitti_04'

# Build the frame pairs 
frames = sorted(os.listdir(depth_dir))
frames = [f for f in frames if f.endswith("png")]
rgb_frames = [category + "/dense/images/" + f for f in frames]
depth_frames = [category + "/depth/" + f for f in frames]
rgb_frames = [f.encode('UTF-8') for f in rgb_frames]
depth_frames = [f.encode('UTF-8') for f in depth_frames]

rgb_frame_1 = np.array(rgb_frames[:-5])
rgb_frame_2 = np.array(rgb_frames[5:])
depth_frame_1 = np.array(depth_frames[:-5])
depth_frame_2 = np.array(depth_frames[5:])

In [6]:
# kitti_dir = os.path.join(data_dir, "KITTI/raw")
# date = '2011_09_30'
# drive = '0016'

# data = pykitti.raw(kitti_dir, date, drive)

colmap_dir = os.path.join(data_dir, 'colmap/04_color/dense/sparse')
points3d = read_points3d_binary(os.path.join(colmap_dir, 'points3D.bin'))
images = read_images_binary(os.path.join(colmap_dir, 'images.bin'))
camera = read_cameras_binary(os.path.join(colmap_dir, 'cameras.bin'))

In [7]:
# # Build the poses
poses = [images[f].pose for f in images]

# K = data.calib.P_rect_00
# K_ci = data.calib.T_cam0_imu @ data.calib.R_rect_00
# p0 = data.oxts[0].T_w_imu

# for i in range(len(frames)):
#     p1 = data.oxts[i].T_w_imu
#     rect_pose_f1 = K_ci @ np.linalg.inv(p0) @ p1 @ np.linalg.inv(K_ci)
#     T_cw_f1 = np.linalg.inv(rect_pose_f1)
#     poses += [T_cw_f1]

In [9]:
# print(poses[0])
# print(K)

[[  9.99926390e-01  -1.00932212e-02  -6.73365835e-03   1.33008738e-01]
 [  1.00494935e-02   9.99928399e-01  -6.49643065e-03   1.17375706e-01]
 [  6.79874613e-03   6.42828259e-03   9.99956226e-01  -6.50992203e+00]
 [  0.00000000e+00   0.00000000e+00   0.00000000e+00   1.00000000e+00]]


In [10]:
pose_f1 = np.array(poses[:-5])
pose_f2 = np.array(poses[5:])
features_dataset = tf.data.Dataset.from_tensor_slices((rgb_frame_1, rgb_frame_2, depth_frame_1, depth_frame_2, pose_f1, pose_f2))

In [11]:
# Test the dataset - make sure it looks right

iter = features_dataset.make_one_shot_iterator()
el = iter.get_next()

with tf.Session() as sess:
    print(sess.run(el))

(b'kitti_04/dense/images/0000000000.png', b'kitti_04/dense/images/0000000005.png', b'kitti_04/depth/0000000000.png', b'kitti_04/depth/0000000005.png', array([[  9.99926390e-01,  -1.00932212e-02,  -6.73365835e-03,
          1.33008738e-01],
       [  1.00494935e-02,   9.99928399e-01,  -6.49643065e-03,
          1.17375706e-01],
       [  6.79874613e-03,   6.42828259e-03,   9.99956226e-01,
         -6.50992203e+00],
       [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          1.00000000e+00]]), array([[  9.99865641e-01,  -1.54760150e-02,  -5.40296983e-03,
          1.19036117e-01],
       [  1.54497728e-02,   9.99868808e-01,  -4.86540866e-03,
          1.02305944e-01],
       [  5.47755814e-03,   4.78128030e-03,   9.99973568e-01,
         -6.26558787e+00],
       [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          1.00000000e+00]]))


In [12]:
def serialize_example(rgb_frame_1, rgb_frame_2, depth_frame_1, depth_frame_2, pose1, pose2):
    """
    Creates a tf.Example message ready to be written to a file.
    """

    # Create a dictionary mapping the feature name to the tf.Example-compatible
    # data type.

    feature = {
      'rgb_fname_1': _bytes_feature(rgb_frame_1),
      'rgb_fname_2': _bytes_feature(rgb_frame_2),
      'depth_fname_1': _bytes_feature(depth_frame_1),
      'depth_fname_2': _bytes_feature(depth_frame_2),
      'pose1': _float_feature(np.reshape(pose1, (-1))),
      'pose2': _float_feature(np.reshape(pose2, (-1))),
    }
    
    example_proto = tf.train.Example(features=tf.train.Features(feature=feature))
    return example_proto.SerializeToString()


def dataset_generator(ds, sess):
    iterator = ds.make_one_shot_iterator()
    next_row = iterator.get_next()
   
    try:
        while True:
            yield sess.run(next_row)

    except tf.errors.OutOfRangeError:
        pass

    
def create_records(ds, record_path):
    with tf.Session() as sess, tf.python_io.TFRecordWriter(record_path) as writer:
        generator = dataset_generator(ds, sess)
        for row in generator:
            example = serialize_example(row[0], row[1], row[2], row[3], row[4], row[5])
            writer.write(example)

In [13]:
tfrecord_name = "kitti-lf-net.tfrecord"
create_records(features_dataset, tfrecord_name)

# Read the tfrecord and make sure it is what we expect

In [14]:
dataset = tf.data.TFRecordDataset(tfrecord_name)

In [15]:
def parser(serialized):
    with tf.name_scope('parse_example'):
        example = tf.parse_single_example(serialized, features={
            'rgb_fname_1': tf.FixedLenFeature([], tf.string),
            'rgb_fname_2': tf.FixedLenFeature([], tf.string),
            'depth_fname_1': tf.FixedLenFeature([], tf.string),
            'depth_fname_2': tf.FixedLenFeature([], tf.string),
            'pose1': tf.FixedLenFeature([16], tf.float32),
            'pose2': tf.FixedLenFeature([16], tf.float32),
        })
    
    return example['rgb_fname_1'], example['rgb_fname_2'], example['depth_fname_1'], example['depth_fname_2'], example['pose1'], example['pose2']

In [16]:
parsed_dataset = dataset.map(parser)
iterator = parsed_dataset.make_one_shot_iterator()

data = iterator.get_next()

In [17]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    rec = sess.run([data])
    print(rec)

[(b'kitti_04/dense/images/0000000000.png', b'kitti_04/dense/images/0000000005.png', b'kitti_04/depth/0000000000.png', b'kitti_04/depth/0000000005.png', array([  9.99926388e-01,  -1.00932214e-02,  -6.73365826e-03,
         1.33008733e-01,   1.00494931e-02,   9.99928415e-01,
        -6.49643084e-03,   1.17375709e-01,   6.79874606e-03,
         6.42828271e-03,   9.99956250e-01,  -6.50992203e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         1.00000000e+00], dtype=float32), array([  9.99865651e-01,  -1.54760154e-02,  -5.40296966e-03,
         1.19036116e-01,   1.54497726e-02,   9.99868810e-01,
        -4.86540888e-03,   1.02305941e-01,   5.47755836e-03,
         4.78128018e-03,   9.99973595e-01,  -6.26558781e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         1.00000000e+00], dtype=float32))]
