# Visualizing Samples and Labels of the SPEED Dataset

This notebook helps to inspect the SPEED dataset. You can see samples from the dataset, with the corresponding ground truth labels visualized as projected axes.

In [None]:
%load_ext autoreload
%autoreload 2
import numpy as np
import json
import os
from matplotlib import pyplot as plt
os.environ["CUDA_VISIBLE_DEVICES"]="0"
from tensorflow.keras.utils import Sequence
from tensorflow.keras.preprocessing import image as keras_image
#%matplotlib notebook
from utils import *
from keras_utils import *

In [None]:
dataset_root_dir = './speed'
dataset = SatellitePoseEstimationDataset(root_dir=dataset_root_dir)

In [None]:
rows = 4
cols = 2
#%matplotlib notebook
%matplotlib inline
def drawBlob(img, pos, size=3, color=[255, 0, 0]):
    for y in range(pos[1] - size, pos[1] + size):
        for x in range(pos[0] - size, pos[0] + size):
            img[y][x] = color

# 1) 8 Kantenpunkte bestimmen
# 2) 8 entspr. Flächen bestimmenn
# 3) 8 Vektor zwischen Kameraprojektion und 3D Punkt bestimmen
# 4) Überprüfen, ob die 8 Vektoren irgendeine der 8 Flächen durchschneiden. Wenn ja: Punkt verwerfen!

for i in range(0, 1):
    img = np.array(dataset.get_image(i))
    q, r = dataset.get_pose(i)
    xa, ya, visible = projectModel(q, r)
    for x, y, v in zip(xa, ya, visible):
        if v and x >= 0.0 and y >= 0.0 and x <= Camera.nu and y <= Camera.nv:
            drawBlob(img, (int(x), int(y)))
    
    plt.figure(figsize=(10, 10))
    plt.imshow(img)
    plt.show()


In [None]:
# Setting up parameters
params = {'dim': (480, 640),
          'batch_size': 8,
          'label_size': 3,
          'shuffle': True}

# Loading and splitting dataset
with open(os.path.join(dataset_root_dir, 'train' + '.json'), 'r') as f:
    label_list = json.load(f)
train_labels = label_list[:int(len(label_list)*.8)]
validation_labels = label_list[int(len(label_list)*.8):]

# Data generators for training and validation
training_generator = KerasDataGenerator(train_labels, dataset_root_dir, **params)
validation_generator = KerasDataGenerator(validation_labels, dataset_root_dir, **params)


In [None]:
for imgs, masks in training_generator:
    print(imgs.shape, masks.shape)
    for img, mask in zip(imgs, masks):        
        # plot with various axes scales
        plt.figure(figsize=(20, 20))

        plt.subplot(121)
        plt.imshow(img.astype(np.uint8)[...,0], cmap='gray')

        plt.subplot(122)
        plt.imshow(mask[...,0], cmap='gray')
        plt.show()
        break
    break

In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, concatenate, Conv2D, MaxPooling2D, Conv2DTranspose, Dense, BatchNormalization, Dropout, LeakyReLU
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras import backend as K

In [None]:
def jaccard_distance_loss(y_true, y_pred, smooth=100):
    """
    Jaccard = (|X & Y|)/ (|X|+ |Y| - |X & Y|)
            = sum(|A*B|)/(sum(|A|)+sum(|B|)-sum(|A*B|))
    
    The jaccard distance loss is usefull for unbalanced datasets. This has been
    shifted so it converges on 0 and is smoothed to avoid exploding or disapearing
    gradient.
    
    Ref: https://en.wikipedia.org/wiki/Jaccard_index
    
    @url: https://gist.github.com/wassname/f1452b748efcbeb4cb9b1d059dce6f96
    @author: wassname
    """
    intersection = K.sum(K.abs(y_true * y_pred), axis=-1)
    sum_ = K.sum(K.abs(y_true) + K.abs(y_pred), axis=-1)
    jac = (intersection + smooth) / (sum_ - intersection + smooth)
    return (1 - jac) * smooth

def soft_dice_loss(y_true, y_pred, epsilon=1e-6): 
    ''' 
    Soft dice loss calculation for arbitrary batch size, number of classes, and number of spatial dimensions.
    Assumes the `channels_last` format.
  
    # Arguments
        y_true: b x X x Y( x Z...) x c One hot encoding of ground truth
        y_pred: b x X x Y( x Z...) x c Network output, must sum to 1 over c channel (such as after softmax) 
        epsilon: Used for numerical stability to avoid divide by zero errors
    
    # References
        V-Net: Fully Convolutional Neural Networks for Volumetric Medical Image Segmentation 
        https://arxiv.org/abs/1606.04797
        More details on Dice loss formulation 
        https://mediatum.ub.tum.de/doc/1395260/1395260.pdf (page 72)
        
        Adapted from https://github.com/Lasagne/Recipes/issues/99#issuecomment-347775022
    '''
    
    # skip the batch and class axis for calculating Dice score
    axes = tuple(range(1, len(y_pred.shape)-1)) 
    numerator = 2. * K.sum(y_pred * y_true, axes)
    denominator = K.sum(K.square(y_pred) + K.square(y_true), axes)
    
    return 1 - K.mean(numerator / (denominator + epsilon)) # average over classes and batch

def focal_loss(target, output, gamma=2):
    # https://github.com/keras-team/keras/issues/6261#issuecomment-358826560
    output /= K.sum(output, axis=-1, keepdims=True)
    eps = K.epsilon()
    output = K.clip(output, eps, 1. - eps)
    return -K.sum(K.pow(1. - output, gamma) * target * K.log(output), axis=-1)

def conv_norm(inp, filters, conv=Conv2D, kernel_size=3):
    c = conv(filters=filters, kernel_size=kernel_size, padding='same')(inp)
    c = BatchNormalization()(c)
    return LeakyReLU(0.1)(c)

def encode(inp, filters):
    c = conv_norm(inp, filters)
    c = conv_norm(c, filters)
    c = conv_norm(c, filters)
    p = MaxPooling2D(pool_size=2)(c)
    return c, p

def decode(inp, shortcut, filters):
    up = concatenate([Conv2DTranspose(filters, 2, strides=2, padding='same')(inp), shortcut], axis=3)
    c = BatchNormalization()(up)
    c = conv_norm(c, filters)
    return conv_norm(c, filters)

filters_encode_decode = [16,32,32,64,128]
filters_middle = [256, 256, 256]

layers = [Input(params['dim'] + (1,))]
for i, filters in enumerate(filters_encode_decode):
    c, p = encode(layers[-1], filters)
    layers.append(c)
    layers.append(p)

for i, filters in enumerate(filters_middle):
    layers.append(conv_norm(layers[-1], filters))

for i, filters in enumerate(reversed(filters_encode_decode)):
    layers.append(decode(layers[-1], layers[((len(filters_encode_decode) - i) * 2) - 1], filters))

layers.append(Conv2D(8, (1, 1), activation='sigmoid')(layers[-1]))

model = Model(inputs=[layers[0]], outputs=[layers[-1]])
model.compile(optimizer = RMSprop(lr=1e03), loss = focal_loss, metrics=['accuracy'])
model.summary()


In [None]:
current_model = "m1.h5"
checkpoint = ModelCheckpoint(current_model,save_best_only=True, verbose=1, monitor="val_loss")
reduce = ReduceLROnPlateau(factor=0.1, patience=5, monitor='val_loss')
earlyStopping = EarlyStopping(patience=20, verbose=1,monitor="val_loss")
history = model.fit_generator(
    generator=training_generator,
    validation_data=validation_generator,
    use_multiprocessing=True, # Only works if training data is loaded into RAM from HDF
    workers=8,
    callbacks=[earlyStopping, checkpoint, reduce],
    epochs=10000
)

In [None]:
for imgs, masks in training_generator:
    for img, mask in zip(imgs, masks):
        pred = model.predict(np.asarray([img]))
        
        # plot with various axes scales
        plt.figure(figsize=(20, 20))

        plt.subplot(121)
        plt.imshow(img.astype(np.uint8)[...,0], cmap='gray')

        plt.subplot(122)
        plt.imshow(pred[0][...,2])

        plt.show()

    break

In [None]:
#!/usr/bin/env python

index = 0

import cv2
import numpy as np
 
# Read Image
img = np.array(dataset.get_image(index))
q, r = dataset.get_pose(index)
xa, ya, visible = project(q, r)
size = img.shape

model_points_all, _ = getSatelliteModel()
image_points_all = np.stack((xa, ya), axis=1)

model_points = model_points_all[visible]
image_points = image_points_all[visible]
print(visible)
print(model_points)
print(image_points)

In [None]:
from scipy.spatial.transform import Rotation as R
 
(success, rotation_vector, translation_vector) = cv2.solvePnP(model_points, image_points, Camera.K, None)

print(q, r)
rot = np.zeros((3, 3), dtype=np.float)
#cv2.Rodrigues(rotation_vector, rot)
print(R.from_rotvec(rotation_vector[...,0]).as_quat(), translation_vector[...,0])
#print(R.from_dcm(rot).as_euler('zyx', degrees=True), R.from_dcm(rot).as_euler('zyx', degrees=True))

In [None]:
rows = 4
cols = 2

fig, axes = plt.subplots(rows, cols, figsize=(12, 12))
for i in range(rows):
    for j in range(cols):
        dataset.visualize(i * rows + j, ax=axes[i][j])
        axes[i][j].axis('off')
fig.tight_layout() 

In [None]:
# Project a 3D point (0, 0, 1000.0) onto the image plane.
# We use this to draw a line sticking out of the nose
 
 
(nose_end_point2D, jacobian) = cv2.projectPoints(np.array([(1.0, 1.0, 1.0)]), rotation_vector, translation_vector, Camera.K, None)
 
for p in image_points:
    cv2.circle(img, (int(p[0]), int(p[1])), 5, (0,0,255), -1)
 
 
p1 = ( int(image_points[0][0]), int(image_points[0][1]))
p2 = ( int(nose_end_point2D[0][0][0]), int(nose_end_point2D[0][0][1]))
 
cv2.line(img, p1, p2, (255,0,0), 2)
 
# Display image
plt.figure(figsize=(20,20))
plt.imshow(img)
plt.show()