# Visualizing Samples and Labels of the SPEED Dataset

This notebook helps to inspect the SPEED dataset. You can see samples from the dataset, with the corresponding ground truth labels visualized as projected axes.

In [None]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0' # 1: RTX, 0: Titan

from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, Activation, AveragePooling2D, concatenate, Conv2D, MaxPooling2D, Conv2DTranspose, Dense, BatchNormalization, Dropout, LeakyReLU
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras import backend as K
import tensorflow
from tensorflow.keras.applications.vgg19 import preprocess_input
from tensorflow.keras.preprocessing import image
import tensorflow as tf
%load_ext autoreload
%autoreload 2
import numpy as np
import json
import os
from matplotlib import pyplot as plt
from tensorflow.keras.preprocessing import image as keras_image
#%matplotlib notebook
from utils import *
from tensorflow.keras.utils import Sequence

In [None]:
# only for rtx
config = tf.ConfigProto(allow_soft_placement=True)
config.gpu_options.allow_growth = True
K.set_session(tf.Session(config=config))

In [None]:
dataset_root_dir = './speed'
dataset = SatellitePoseEstimationDataset(root_dir=dataset_root_dir)

In [None]:
rows = 4
cols = 2
#%matplotlib notebook
%matplotlib inline
def drawBlob(img, pos, size=3, color=[255, 0, 0]):
    for y in range(pos[1] - size, pos[1] + size):
        for x in range(pos[0] - size, pos[0] + size):
            img[y][x] = color

# 1) 8 Kantenpunkte bestimmen
# 2) 8 entspr. Flächen bestimmenn
# 3) 8 Vektor zwischen Kameraprojektion und 3D Punkt bestimmen
# 4) Überprüfen, ob die 8 Vektoren irgendeine der 8 Flächen durchschneiden. Wenn ja: Punkt verwerfen!

for i in range(0, 1):
    img = np.array(dataset.get_image(i))
    q, r = dataset.get_pose(i)
    xa, ya, visible = projectModel(q, r)
    for x, y, v in zip(xa, ya, visible):
        if v and x >= 0.0 and y >= 0.0 and x <= Camera.nu and y <= Camera.nv:
            drawBlob(img, (int(x), int(y)))
    
    plt.figure(figsize=(10, 10))
    plt.imshow(img)
    plt.show()


In [None]:
avg_q = np.array([0]*4, dtype=np.float64)
avg_r = np.array([0]*3, dtype=np.float64)
max_z = 0
for i in range(12000):
    q, r = dataset.get_pose(i)
    avg_q += q
    avg_r += r
    if r[2] > max_z:
        max_z = r[2]
avg_q /= 12000
avg_r /= 12000
print(list(avg_q), list(avg_r), max_z)

In [None]:
# Project a 3D point (0, 0, 1000.0) onto the image plane.
# We use this to draw a line sticking out of the nose
 
 
(nose_end_point2D, jacobian) = cv2.projectPoints(np.array([(1.0, 1.0, 1.0)]), rotation_vector, translation_vector, Camera.K, None)
 
for p in image_points:
    cv2.circle(img, (int(p[0]), int(p[1])), 5, (0,0,255), -1)
 
 
p1 = ( int(image_points[0][0]), int(image_points[0][1]))
p2 = ( int(nose_end_point2D[0][0][0]), int(nose_end_point2D[0][0][1]))
 
cv2.line(img, p1, p2, (255,0,0), 2)
 
# Display image
plt.figure(figsize=(20,20))
plt.imshow(img)
plt.show()

## Approach 2

In [None]:
#tensorflow.keras.backend.set_learning_phase(0)
pretrained_model = tensorflow.keras.applications.vgg19.VGG19(weights="imagenet", include_top=False,
                                                          input_shape=(600, 960, 3))

def create_stage(inp, filters):
    c = Conv2D(filters=64, kernel_size=7, strides=1, padding="same")(inp)
    c = BatchNormalization()(c)
    c = Activation("relu")(c)
    
    for i in range(4):
        c = Conv2D(filters=64, kernel_size=7, strides=1, padding="same")(c)
        c = BatchNormalization()(c)
        c = Activation("relu")(c)

    c = Conv2D(filters=64, kernel_size=1, strides=1, padding="same")(c)
    c = BatchNormalization()(c)
    c = Activation("relu")(c)
    return Conv2D(filters=filters, kernel_size=1, strides=1, padding="same")(c)

numBeliefMap = 8
    
# Adding new trainable hidden and output layers to the model
#tensorflow.keras.backend.set_learning_phase(1)

inp = pretrained_model.input
inp_avg = AveragePooling2D(pool_size=9, strides=8, padding="same")(inp)

c = pretrained_model.layers[13].output
c = Conv2D(filters=128, kernel_size=3, strides=1, padding="same")(c)
c = BatchNormalization()(c)
c = Activation("relu")(c)

pre = Conv2D(filters=128, kernel_size=3, strides=1, padding="same")(c)
pre = BatchNormalization()(pre)
pre = Activation("relu")(pre)

s1 = Conv2D(filters=128, kernel_size=1, strides=1, padding="same")(pre)
stages = [Conv2D(filters=numBeliefMap, kernel_size=1, strides=1, padding="same")(s1)]

for i in range(5):
    stages.append(create_stage(concatenate([pre, stages[-1], inp_avg], axis=3), numBeliefMap))

model = tensorflow.keras.models.Model(inputs=inp, outputs=stages)
model.compile(loss="mse", optimizer=Adam(lr=1e-3))

model.summary()


In [None]:
tf.keras.utils.plot_model(
    model,
    to_file='model.png',
    show_shapes=False,
    show_layer_names=True,
    rankdir='TB'
)

In [None]:
from imgaug import augmenters as iaa

class KerasDataGenerator2(Sequence):

    """ DataGenerator for Keras to be used with fit_generator (https://keras.io/models/sequential/#fit_generator)"""

    def __init__(self,
                 preprocessor,
                 label_list,
                 speed_root,
                 label_size,
                 batch_size=32,
                 dim=(224, 224),
                 shuffle=True,
                 output_scale=8,
                 n_output_vertices=8,
                 stages=6):

        # loading dataset
        self.image_root = os.path.join(speed_root, 'images', 'train')

        # Initialization
        self.preprocessor = preprocessor
        self.dim = dim
        self.batch_size = batch_size
        self.labels = self.labels = {label['filename']: {'q': label['q_vbs2tango'], 'r': label['r_Vo2To_vbs_true']}
                                     for label in label_list}
        self.list_IDs = [label['filename'] for label in label_list]
        self.shuffle = shuffle
        self.label_size = label_size
        self.indexes = None
        self.output_scale = output_scale
        self.n_output_vertices = n_output_vertices
        self.stages = stages
        self.on_epoch_end()

    def __len__(self):

        """ Denotes the number of batches per epoch. """

        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):

        """ Generate one batch of data """

        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]

        # Generate data
        return self.__data_generation(list_IDs_temp)

    def on_epoch_end(self):

        """ Updates indexes after each epoch """

        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle:
            np.random.shuffle(self.indexes)

    def drawBlob(self, img, pos, sigma=3):
        # https://github.com/NVlabs/Deep_Object_Pose/blob/master/src/training/train.py#L851
        w = int(sigma*3)
        if pos[0]-w>=0 and pos[0]+w<img.shape[0] and pos[1]-w>=0 and pos[1]+w<img.shape[1]:
            for i in range(int(pos[0])-w, int(pos[0])+w):
                for j in range(int(pos[1])-w, int(pos[1])+w):
                    img[i,j] = np.exp(-(((i - pos[0])**2 + (j - pos[1])**2)/(2*(sigma**2))))

    def __data_generation(self, list_IDs_temp):

        """ Generates data containing batch_size samples """

        # Initialization
        imgs = np.empty((self.batch_size, *self.dim, 3))
        masks = np.zeros((self.batch_size, int(self.dim[0] / self.output_scale), int(self.dim[1] / self.output_scale), self.n_output_vertices), dtype=np.float)

        seq = iaa.SomeOf((0, 3),
            [
                iaa.OneOf([
                    iaa.GaussianBlur((0, 3.0)), # blur images with a sigma between 0 and 3.0
                    iaa.AverageBlur(k=(2, 7)), # blur image using local means with kernel sizes between 2 and 7
                    #iaa.MedianBlur(k=(3, 11)), # blur image using local medians with kernel sizes between 2 and 7
                ]),
                iaa.Sharpen(alpha=(0, 1.0), lightness=(0.75, 1.5)), # sharpen images
                # search either for all edges or for directed edges,
                # blend the result with the original image using a blobby mask
                iaa.SimplexNoiseAlpha(iaa.OneOf([
                    iaa.EdgeDetect(alpha=(0.5, 1.0)),
                    iaa.DirectedEdgeDetect(alpha=(0.5, 1.0), direction=(0.0, 1.0)),
                ])),
                iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05*255)), # add gaussian noise to images
                iaa.OneOf([
                    iaa.Dropout((0.01, 0.1)), # randomly remove up to 10% of the pixels
                    iaa.CoarseDropout((0.03, 0.15), size_percent=(0.02, 0.05)),
                ]),
                iaa.Add((-10, 10)), # change brightness of images (by -10 to 10 of original value)
                iaa.OneOf([
                    iaa.Multiply((0.5, 1.5)),
                    iaa.FrequencyNoiseAlpha(
                        exponent=(-4, 0),
                        first=iaa.Multiply((0.5, 1.5)),
                        second=iaa.ContrastNormalization((0.5, 2.0))
                    )
                ]),
                iaa.ContrastNormalization((0.5, 2.0)), # improve or worsen the contrast
            ],
            random_order=True
        ).to_deterministic()

        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            img_path = os.path.join(self.image_root, ID)
            img = keras_image.load_img(img_path, target_size=self.dim) #, color_mode = "grayscale")
            img = keras_image.img_to_array(img)
            img_aug = seq.augment_image(img)
            imgs[i] = self.preprocessor(img_aug)

            q, r = self.labels[ID]['q'], self.labels[ID]['r']
            xa, ya, visibles = projectModel(q, r)
            for j, (x, y, visible) in enumerate(zip(xa, ya, visibles)):
                if j >= self.n_output_vertices:
                    break
                
                x /= (Camera.nu * self.output_scale)
                y /= (Camera.nv * self.output_scale)
                if visible and x >= 0.0 and y >= 0.0 and x <= 1.0 and y <= 1.0:
                    x_s, y_s = int(x * self.dim[1]), int(y * self.dim[0])
                    self.drawBlob(masks[i][...,j], (y_s, x_s), self.label_size)

                #masks[i][0][0] = 0
        return imgs, [masks*255 for i in range(self.stages)]

In [None]:
import random
# Setting up parameters
params = {'dim': (600, 960),
          'batch_size': 6,
          'label_size': 1.5,
          'shuffle': True,
          'output_scale': 8,
          'n_output_vertices': 8,
          'stages': 6}

# Loading and splitting dataset
with open(os.path.join(dataset_root_dir, 'train' + '.json'), 'r') as f:
    label_list = json.load(f)
    random.Random(4).shuffle(label_list)

train_labels = label_list[:int(len(label_list)*.8)]
validation_labels = label_list[int(len(label_list)*.8):]

# Data generators for training and validation
training_generator = KerasDataGenerator2(preprocess_input, train_labels, dataset_root_dir, **params)
validation_generator = KerasDataGenerator2(preprocess_input, validation_labels, dataset_root_dir, **params)


In [None]:
for imgs, masks in training_generator:
    print(imgs.shape, len(masks), masks[0].shape)
    for img, mask in zip(imgs, masks[0]):        
        # plot with various axes scales

        print(img.min(), img.max(), np.asarray(mask).min(), np.asarray(mask).max())
        plt.figure(figsize=(20, 20))

        plt.subplot(121)
        plt.imshow(img[...,0], cmap='gray')

        plt.subplot(122)
        m = np.zeros(mask.shape[:2], dtype=np.float)
        #for i in range(8):
        #    m += mask[...,i]
        m += mask[...,1]
        plt.imshow(m, cmap='gray')
        plt.show()
        #break
    #break

In [None]:
print(label_list[0])

In [None]:
model = load_model("m7.h5")

In [None]:
model.save_weights("m7_weights.h5")

In [None]:
model.load_weights("m7_weights.h5")

In [None]:
current_model = "m8.h5"
checkpoint = ModelCheckpoint(current_model,save_best_only=True, verbose=1, monitor="val_loss")
reduce = ReduceLROnPlateau(factor=0.5, patience=5, monitor='val_loss')
earlyStopping = EarlyStopping(patience=10, verbose=1,monitor="val_loss")
history = model.fit_generator(
    generator=training_generator,
    validation_data=validation_generator,
    use_multiprocessing=False, # Only works if training data is loaded into RAM from HDF
    workers=8,
    callbacks=[earlyStopping, checkpoint],# reduce],
    epochs=10000
)

In [None]:
model.save("m7_final.h5")

In [None]:
#!/usr/bin/env python

index = 0

import cv2
import numpy as np
 
# Read Image
img = np.array(dataset.get_image(index))
q, r = dataset.get_pose(index)
xa, ya, visible = projectModel(q, r)
size = img.shape

model_points_all, _ = getSatelliteModel()
image_points_all = np.stack((xa, ya), axis=1)

model_points = model_points_all#[visible]
image_points = image_points_all#[visible]
#np.random.shuffle(image_points)
print(visible)
print(model_points)
print(image_points)

In [None]:
from pyrr import Quaternion

def points_to_pose(points_3d, points_2d, camera_matrix=Camera.K):
    def convert_rvec_to_quaternion(rvec):
        '''Convert rvec (which is log quaternion) to quaternion'''
        theta = np.sqrt(rvec[0] * rvec[0] + rvec[1] * rvec[1] + rvec[2] * rvec[2])  # in radians
        raxis = [rvec[0] / theta, rvec[1] / theta, rvec[2] / theta]

        # pyrr's Quaternion (order is XYZW), https://pyrr.readthedocs.io/en/latest/oo_api_quaternion.html
        return np.roll(Quaternion.from_axis_rotation(raxis, theta), 1) # change order to wxyz

    (success, rotation_vector, translation_vector, outliners) = cv2.solvePnPRansac(
        points_3d, points_2d, camera_matrix, None, iterationsCount=200, reprojectionError=38)
    #(success, rotation_vector, translation_vector) = cv2.solvePnP(points_3d, points_2d, camera_matrix, None)
    #print(outliners)
    if success:
        location = list(translation_vector[...,0])
        quaternion = convert_rvec_to_quaternion(rotation_vector)

        projected_points, _ = cv2.projectPoints(points_3d, rotation_vector, translation_vector, camera_matrix, None)
        projected_points = np.squeeze(projected_points)

        # If the location.Z is negative or object is behind the camera then flip both location and rotation
        x, y, z = location
        if z < 0:
            print("neg")
            # Get the opposite location
            location = [-x, -y, -z]

            # Change the rotation by 180 degree
            rotate_angle = np.pi
            rotate_quaternion = Quaternion.from_axis_rotation(location, rotate_angle)
            quaternion = rotate_quaternion.cross(quaternion)

        return quaternion, location, projected_points
    return [], [], []
quat, loc, points = points_to_pose(model_points, image_points)
print(q, r)
print(quat, loc)
print(points)

In [None]:
from scipy.spatial.transform import Rotation as R
print(R.from_quat(quaternion).as_euler('zyx', degrees=True))
print(R.from_quat(q).as_euler('zyx', degrees=True))

In [None]:
points_2d = np.asarray([
 [ 752., 1008.],
 [ 608.,  304.],
 [ 880.,  960.],
 [ 688.,  368.],
 [1040.,  656.],
 [1056.,  672.],
 [ 848.,  320.],
 [ 880.,  944.],
 [1040.,  656.]], dtype=np.float32)

points_3d = np.asarray([
 [ 0.375, -0.4,    0.32 ],
 [-0.375, -0.4,    0.32 ],
 [-0.375,  0.3,    0.   ],
 [-0.375,  0.3,    0.   ],
 [-0.375,  0.3,    0.   ],
 [ 0.375,  0.3,    0.   ],
 [ 0.375, -0.3,    0.   ],
 [-0.375, -0.3,    0.   ],
 [-0.375, -0.3,    0.   ]], dtype=np.float32)
print(points_2d)
print(points_3d)
quat_res, trans_res, points = points_to_pose(points_3d, points_2d)
print(quat_res, trans_res)
print(points)

In [None]:
rows = 4
cols = 2

fig, axes = plt.subplots(rows, cols, figsize=(12, 12))
for i in range(rows):
    for j in range(cols):
        dataset.visualize(i * rows + j, ax=axes[i][j])
        axes[i][j].axis('off')
fig.tight_layout() 

In [None]:
from matplotlib.patches import Rectangle

def extract_maxima(belief, area=5):
    b = belief.copy()
    maxima = []
    maxima_vals = []
    for i in range(3):
        pmax = np.unravel_index(b.argmax(), b.shape)
        pmax_val = b[pmax[0]][pmax[1]]
        if i > 0 and pmax_val < maxima_vals[-1] * 0.5 or pmax_val < 30.0:
        #if pmax_val < 100.0:
            break
        maxima_vals.append(pmax_val)
        b[max(0,pmax[0]-area):min(b.shape[0], pmax[0]+area):, max(0,pmax[1]-area):min(b.shape[1], pmax[1]+area):] = 0
        maxima.append(pmax)
    return np.asarray(maxima)

def predict_pose(model, img, mask=None, debug=True):
    camera_matrix_modified = Camera.K.copy()
    camera_matrix_modified[0][2] = params['dim'][0] / 2
    camera_matrix_modified[1][2] = params['dim'][1] / 2
    model_points_all, _ = getSatelliteModel()

    pred = model.predict(np.asarray([img]))

    if debug:
        plt.figure(figsize=(20, 40))

    points_3d = []
    points_2d = []
    for i, p3d in enumerate(model_points_all):
        p = pred[-1][0][...,i]
        if debug:
            plt.subplot(8,3,i * 3 + 1)
            plt.imshow((img + 128).astype(np.uint8), cmap='gray')
            ax = plt.subplot(8,3,i * 3 + 2)
            ax.imshow(p)
        maxima = extract_maxima(p)
        if debug and mask is not None:
            axmask = plt.subplot(8,3,i * 3 + 3)
            axmask.imshow(mask[...,i])
        for m in maxima:
            points_3d.append(p3d)
            points_2d.append([m[1]*8*2, m[0]*8*2])
            if debug:
                ax.add_patch(Rectangle((m[1], m[0]),1,1,linewidth=5,edgecolor='r',facecolor='none'))
                if mask is not None:
                    axmask.add_patch(Rectangle((m[1], m[0]),1,1,linewidth=5,edgecolor='r',facecolor='none'))
            #break
    points_3d = np.array(points_3d, dtype=np.float32)
    points_2d = np.array(points_2d, dtype=np.float32)
    if len(points_2d) < 4:
        return np.array([0]*4), np.array([0]*3)
    
    quat_res, trans_res, points = points_to_pose(points_3d, points_2d, camera_matrix_modified)
    if debug:
        plt.show()
        print("orig", points_2d)
        print("model", points_3d)
        print("proj", points)

        aximgproj = plt.subplot(111)
        aximgproj.imshow((img + 128).astype(np.uint8), cmap='gray')
        for p in points:
            aximgproj.add_patch(Rectangle((p[0]/2, p[1]/2),3,3,linewidth=2,edgecolor='r',facecolor='none'))
        plt.show()
    return quat_res, trans_res
    #print("res", quat_res, trans_res)


In [None]:
for imgs, masks in validation_generator:
    for img, mask in zip(imgs, masks[0]):        
        try:
            quat_res, trans_res = predict_pose(model, img, mask)
        except cv2.error as e:
            continue
        print("res", quat_res, trans_res)
        print("="*30)
        #break
    #break

In [None]:
from submission import SubmissionWriter

def evaluate(model, dataset, append_submission, dataset_root):

    """ Running evaluation on test set, appending results to a submission. """

    with open(os.path.join(dataset_root, dataset + '.json'), 'r') as f:
        image_list = json.load(f)

    print('Running evaluation on {} set...'.format(dataset))

    err1 = 0
    err2 = 0
    err3 = 0
    err4 = 0
    for i, img in enumerate(image_list):
        print("index", i)
        img_path = os.path.join(dataset_root, 'images', dataset, img['filename'])
        
        img_raw = keras_image.load_img(img_path, target_size=(600, 960)) #, color_mode = "grayscale")
        img_raw = keras_image.img_to_array(img_raw)
        img_proc = preprocess_input(img_raw)
        
        try:
            quat_res, trans_res = predict_pose(model, img_proc, debug=False)
            print(quat_res, trans_res)
            if len(quat_res) == 0 or len(trans_res) == 0:
                append_submission(img['filename'], [0]*4, [0,0,3])
                err1 += 1
            elif (np.array(trans_res) == 0).all():
                append_submission(img['filename'], [0]*4, [0,0,10])
                err4 += 1
            elif trans_res[2] > 60:
                append_submission(img['filename'], [0]*4, [0,0,40])
                err2 += 1
            else:
                append_submission(img['filename'], quat_res, trans_res)
        except cv2.error as e:
            append_submission(img['filename'], [0]*4, [0,0,10])
            err3 += 1
    print("Err amount", err1, err2, err3)
            
submission = SubmissionWriter()
evaluate(model, 'test', submission.append_test, dataset_root_dir)
evaluate(model, 'real_test', submission.append_real_test, dataset_root_dir)
submission.export(suffix='keras_example')
