# Siamese-FC

In [82]:
%matplotlib inline

import numpy as np
import matplotlib.pyplot as plt

import keras
import tensorflow as tf
from keras.layers import Input, Conv2D, BatchNormalization, MaxPool2D, Lambda, Reshape, Flatten
from keras.models import Model
from keras import backend as K
from math import sqrt

**Build Model**

In [171]:
Z_SHAPE = (127, 127, 3)
X_SHAPE = (255, 255, 3)

def conv_layer(filters, kernel_dim, stride_len):
    return [Conv2D(filters, kernel_dim, strides=stride_len,
                  padding='valid', activation='relu', kernel_initializer='glorot_normal')]

def conv_block(filters, kernel_dim, stride_len):
    batch_norm = [BatchNormalization(axis=3)]
    return conv_layer(filters, kernel_dim, stride_len) + batch_norm

def max_pool():
    return [MaxPool2D(pool_size=3, strides=2, padding='valid')]

def alex_net_layers():
    layers = []
    layers += conv_block(48, 11, 2)
    layers += max_pool()
    layers += conv_block(128, 5, 1)
    layers += max_pool()
    layers += conv_block(48, 3, 1)
    layers += conv_block(48, 3, 1)
    layers += conv_layer(32, 3, 1)
    return layers

def apply_layers(x, layers):
    out = x
    for layer in layers:
        out = layer(out)
    return out

def add_dimension(t):
    return tf.reshape(t, (1,) + t.shape)

def cross_correlation(inputs):
    x = inputs[0]
    x = tf.reshape(x, [1] + x.shape.as_list())
    z = inputs[1]
    z = tf.reshape(z, z.shape.as_list() + [1])
    return tf.nn.convolution(x, z, padding='VALID', strides=(1,1))

def x_corr_map(inputs):
    # Note that dtype MUST be specified, otherwise TF will assert that the input and output structures are the same,
    # which they most certainly are NOT.
    return K.reshape(tf.map_fn(cross_correlation, inputs, dtype=tf.float32, infer_shape=False), shape=(-1,17,17))
    
def x_corr_layer():
    return Lambda(x_corr_map, output_shape=(17, 17))

def loss_fn(y_true, y_pred):
    print('y_true shape: %s' % y_true.shape)
    print('y_pred shape: %s' % y_pred.shape)
    product = -y_true * y_pred
    probs = 1 + K.clip(K.exp(product), 0, 1e9)
    log_probs = K.log(probs)
    return K.mean(K.flatten(log_probs), axis=-1)

def loss_exp_fn(inputs):
    y_true, y_pred = inputs
    product = -y_true * y_pred
    probs = 1 + K.clip(K.exp(product), 0, 1e9)
    loss = K.log(probs)
    return K.mean(K.flatten(loss), axis=-1)

def binary_cross_entropy(inputs):
    y_true, y_pred = inputs
    return K.binary_crossentropy(y_pred, y_true)
    
def loss_exp():
    return Lambda(loss_exp_fn)

def make_model(x_shape, z_shape, w_loss=False):
    exemplar = Input(shape=z_shape)
    search = Input(shape=x_shape)
    label_input = Input(shape=(17,17))

    alex_net = alex_net_layers()

    exemplar_features = apply_layers(exemplar, alex_net)
    search_features = apply_layers(search, alex_net)
    score_map = x_corr_layer()([search_features, exemplar_features])
    
    outputs = [score_map]
    inputs = [search, exemplar]
    
    if w_loss:
        loss_layer = loss_exp()([label_input,score_map])
        outputs = outputs + [loss_layer]
        inputs = inputs + [label_input]
    model = Model(inputs=inputs, outputs=outputs)
    return model
    
model = make_model(X_SHAPE, Z_SHAPE)

**Load images as Numpy arrays**

In [84]:
from keras.preprocessing import image

x_dir = '../sample/x/'
z_dir = '../sample/z/'

def load_images(directory, dimension, n_images, suffix):
    img_array = np.empty((n_images, dimension, dimension, 3))
    for i in range(1, n_images + 1):
        img = image.load_img(directory + str(i) + suffix, target_size=(dimension, dimension))
        img_array[i - 1] = image.img_to_array(img)
    return img_array

**Make label prototype for search images**

In [85]:
from math import ceil
def inclusive_range(start, end):
    return range(start, end + 1)

def euclidean_distance(x1, y1, x2, y2):
    return sqrt((x1 - x2)**2 + (y1 - y2)**2)

# Create labels for a score map of size dim x dim, where the label of a score is positive for any cell within
# radius of the center, and negative otherwise.
def make_label(dim, radius):
    label = np.full((dim, dim), -1)
    center = int(dim / 2.0)
    start = center - ceil(radius)
    end = center + ceil(radius)
    for i in inclusive_range(start, end):
        for j in inclusive_range(start, end):
            if euclidean_distance(i, j, center, center) <= radius:
                label[i,j] = 1
    return label

# Creates a matrix of weights where the sum of the weights for each label in {-1, 1} is 0.5. Used to account for
# the fact that, given a search image we may have more negative than positive examples or vice versa.
def make_label_weights(labels):
    values, counts = np.unique(labels, return_counts=True)
    class_weights = {}
    for i in range(len(values)):
        class_weights[values[i]] = 0.5 / counts[i]
    return class_weights

**Loading and preprocessing images**

In [96]:
x_images = load_images(x_dir, 255, 100, ".x.jpg")
z_images = load_images(z_dir, 127, 100, ".z.jpg")
color_means = np.mean(x_images, axis=(0,1,2), keepdims=True)
print(color_means)
print(color_means.shape)
x_images -= color_means
z_images -= color_means
color_std_dev = np.std(x_images, axis=(0,1,2), keepdims=True)
x_images /= color_std_dev
z_images /= color_std_dev
print(np.mean(x_images, axis=(0,1,2)))
print(np.mean(z_images, axis=(0,1,2)))

[[[[ 122.63988712  120.70739254  108.4214396 ]]]]
(1, 1, 1, 3)
[ -1.01203627e-14   9.27809142e-14   2.33398234e-13]
[ 0.02205711  0.01812003  0.01516509]


In [172]:
# constants
R = 16
batch_size = 1
response_size = 17
data_size = 100
response_stride = 6.0
label = make_label(response_size, R / response_stride)
labels = np.empty((data_size,) + label.shape)
labels[:] = label
print(labels.shape)

model = make_model(X_SHAPE, Z_SHAPE, False)
model.compile(optimizer='adam', loss=loss_fn, metrics=['accuracy'])
model.predict([x_images[0:2], z_images[0:2]])

(100, 17, 17)
y_true shape: (?, ?, ?)
y_pred shape: (?, 17, 17)


array([[[ 0.31943849,  0.37116632,  0.40889218,  0.41777852,  0.41824475,
          0.41410559,  0.41518939,  0.39951226,  0.38638726,  0.36784509,
          0.33717319,  0.30038488,  0.2579596 ,  0.22084203,  0.17876056,
          0.1449593 ,  0.11471835],
        [ 0.46942252,  0.53352249,  0.59080386,  0.61262941,  0.61821938,
          0.60432863,  0.59714681,  0.58119333,  0.57119733,  0.54977381,
          0.50063992,  0.44444653,  0.38483822,  0.32954618,  0.26359183,
          0.21225727,  0.16416484],
        [ 0.60206139,  0.68689287,  0.76265359,  0.78997684,  0.79351085,
          0.7685867 ,  0.75375116,  0.74326491,  0.72840017,  0.71222895,
          0.68428606,  0.62534833,  0.55979621,  0.4839204 ,  0.39999384,
          0.31312707,  0.23588951],
        [ 0.70063531,  0.7926904 ,  0.8828299 ,  0.91291738,  0.9120338 ,
          0.86538696,  0.83185041,  0.81629592,  0.81188321,  0.80048126,
          0.79600173,  0.77424455,  0.71541548,  0.63321853,  0.54190302,
    

**For some reason, loss does not change at all, currently. wtf?**

In [173]:
model.fit([x_images, z_images], labels, batch_size=8, epochs=5)

Epoch 1/5
Epoch 2/5

KeyboardInterrupt: 

In [72]:
model.predict([x_images[0:1], z_images[0:1]])

array([[[ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., 

In [73]:
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_56 (InputLayer)            (None, 255, 255, 3)   0                                            
____________________________________________________________________________________________________
input_55 (InputLayer)            (None, 127, 127, 3)   0                                            
____________________________________________________________________________________________________
conv2d_136 (Conv2D)              multiple              17472                                        
____________________________________________________________________________________________________
batch_normalization_109 (BatchNo multiple              192                                          
___________________________________________________________________________________________

In [79]:
x_images.shape

(100, 255, 255, 3)

In [81]:
avg_colors = np.mean(z_images, axis=(0,1,2))
print(avg_colors)

[ 21.81794532  19.62064728   7.22914812]
