In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals
import functools

import numpy as np
import tensorflow as tf
from tensorflow.keras import Model
import json
import os
import quaternion
import h5py
import models

#import capsulelayers as capslayers
#import capsulenet as capsnet

In [2]:
# Preprocess JSON data and create the dataset
training_dataset_folder_path = "C:/Users/BORA/Desktop/lm_test_no_occlusion" #change this
training_split = "train"
output_classes = 3 # Can vary depending on the amount of objects

# Save the labels. 
object_ids = []
poses      = []
bboxes     = []

for scene in os.listdir(os.path.join(training_dataset_folder_path, training_split)):
    scene_gt_path      = os.path.join(training_dataset_folder_path, training_split, scene, "scene_gt.json")
    scene_gt_info_path = os.path.join(training_dataset_folder_path, training_split, scene, "scene_gt_info.json")
    
    with open(scene_gt_path, 'r') as scene_gt_file:
        scene_gt_json = json.load(scene_gt_file)
        for img_gt_key in scene_gt_json:
            translation = scene_gt_json[img_gt_key][0]['cam_t_m2c']
            rotation    = scene_gt_json[img_gt_key][0]['cam_R_m2c']
            rotation    = np.reshape(rotation, (3,3))
            rotation    = quaternion.from_rotation_matrix(rotation)
            rotation    = quaternion.as_float_array(rotation)
            obj_id      = scene_gt_json[img_gt_key][0]['obj_id']

            label_id     = []   # One-hot encoding
            label_pose   = []   # Transform + Quaternion

            for index in range(output_classes):
                if index==obj_id-1:
                    label_id.append(1.0)
                else:
                    label_id.append(0.0)
            label_id = np.asarray(label_id)
            object_ids.append(label_id)

            label_pose.append(translation[0])
            label_pose.append(translation[1])
            label_pose.append(translation[2])
            label_pose.append(rotation[0])
            label_pose.append(rotation[1])
            label_pose.append(rotation[2])
            label_pose.append(rotation[3])

            label_pose = np.asarray(label_pose)
            poses.append(label_pose)
            

    with open(scene_gt_info_path, 'r') as scene_gt_info_file:
        scene_gt_info_json = json.load(scene_gt_info_file)
        for img_gt_key in scene_gt_info_json:
            labels_bboxes = [] # BBox coords
            bbox = scene_gt_info_json[img_gt_key][0]['bbox_obj']
            labels_bboxes.append(bbox[0])
            labels_bboxes.append(bbox[1])
            labels_bboxes.append(bbox[2])
            labels_bboxes.append(bbox[3])

            labels_bboxes = np.asarray(labels_bboxes)
            bboxes.append(labels_bboxes)


In [3]:
print(object_ids[0])
print(poses[0])
print(bboxes[0])

[1. 0. 0.]
[ 0.00000000e+00  0.00000000e+00  9.93988500e+02  2.44412794e-01
 -1.13855420e-02  9.68554121e-01  4.51184035e-02]
[ 255  153  129 -187]


In [4]:
# Make a dataset from the training images

training_images = []
i = 0
for scene in os.listdir(os.path.join(training_dataset_folder_path, training_split)):
    rgb_images_path = os.path.join(training_dataset_folder_path, training_split, scene, "rgb")

    for img in os.listdir(rgb_images_path):
        # Open the image file
        img = tf.io.read_file(os.path.join(rgb_images_path, img))
        # convert the compressed string to a 3D uint8 tensor
        img = tf.image.decode_png(img, channels=3)
        # Use `convert_image_dtype` to convert to floats in the [0,1] range.
        img = tf.image.convert_image_dtype(img, tf.float32)
        # resize the image to the desired size.
        img = tf.image.resize(img, [224, 224])
        # add to the image dataset
        training_images.append(img.numpy())

train_dataset = tf.data.Dataset.from_tensor_slices(({'input': training_images}, {'class_output': object_ids, 'pose_output': poses, 'bbox_output': bboxes}))
train_dataset = train_dataset.batch(1)
print(training_images[0].shape)


(224, 224, 3)


In [5]:
train_dataset

<BatchDataset shapes: ({input: (None, 224, 224, 3)}, {class_output: (None, 3), pose_output: (None, 7), bbox_output: (None, 4)}), types: ({input: tf.float32}, {class_output: tf.float64, pose_output: tf.float64, bbox_output: tf.int32})>

In [12]:
#Training
model = MobilePoseNet()
model.summary()

optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)
tf.keras.utils.plot_model(model, 'model.png', show_shapes=True, show_layer_names=True)


checkpoint_path = "mobile_pose{epoch:04d}.h5"
checkpoint_dir = os.path.dirname(checkpoint_path)

# Create a callback that saves the model's weights
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=False,
                                                 verbose=1, period=5)

from datetime import datetime
logdir = "logs\\scalars\\" + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)

early_stopping_callback_class = tf.keras.callbacks.EarlyStopping(monitor='class_output_categorical_accuracy', patience=10, verbose=1, mode='max')
early_stopping_callback_pose  = tf.keras.callbacks.EarlyStopping(monitor='pose_output_mean_squared_error', patience=10, verbose=1, mode='min')
early_stopping_callback_bbox  = tf.keras.callbacks.EarlyStopping(monitor='bbox_output_mean_squared_error', patience=10, verbose=1, mode='min')

model.compile(optimizer=optimizer, loss=[tf.keras.losses.CategoricalCrossentropy(), 
                                         tf.keras.losses.MeanSquaredError(), 
                                         tf.keras.losses.MeanSquaredError()], 
                                   metrics={'class_output': [tf.keras.metrics.CategoricalAccuracy()],
                                            'pose_output': [tf.keras.metrics.MeanSquaredError()],
                                            'bbox_output': [tf.keras.metrics.MeanSquaredError()]})

history = model.fit(train_dataset, epochs=50, use_multiprocessing=True, callbacks=[cp_callback, tensorboard_callback, early_stopping_callback_bbox,
                                                                                    early_stopping_callback_class, early_stopping_callback_pose])

________________________________________________________________________________________________
conv2d_47 (Conv2D)              (None, 23, 23, 32)   25120       dropout_15[0][0]                 
__________________________________________________________________________________________________
conv2d_48 (Conv2D)              (None, 10, 10, 16)   12816       conv2d_47[0][0]                  
__________________________________________________________________________________________________
conv2d_49 (Conv2D)              (None, 8, 8, 8)      1160        conv2d_48[0][0]                  
__________________________________________________________________________________________________
conv2d_50 (Conv2D)              (None, 6, 6, 16)     1168        conv2d_49[0][0]                  
__________________________________________________________________________________________________
dropout_16 (Dropout)            (None, 6, 6, 16)     0           conv2d_50[0][0]                  
____________