In [1]:
# Useful tools for one-shot learning

In [2]:
from itertools import combinations 
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Dropout, Activation, Lambda
from tensorflow.keras.models import Model
import tensorflow.keras.backend as K
print(tf.__version__)
import tensorflow_addons as tfa
print(tfa.__version__)

2.1.0
0.9.1


In [3]:
# Dummy data
data = [10, 20, 30, 40, 50]
labels = [0, 0, 1, 2, 2]
id_list = np.arange(len(data))

print(id_list)

[0 1 2 3 4]


In [4]:
# Generate data id pairs and pair ids
id_pair_list = list(combinations(id_list, 2))
print('Total number of data pairs:', len(id_pair_list))
id_pair_id_list = np.arange(len(id_pair_list))

print()
print(id_pair_list)
print()
print(id_pair_id_list)

Total number of data pairs: 10

[(0, 1), (0, 2), (0, 3), (0, 4), (1, 2), (1, 3), (1, 4), (2, 3), (2, 4), (3, 4)]

[0 1 2 3 4 5 6 7 8 9]


In [5]:
# Distances
def manhattan_distance(vects):
    # L1 distance
    x, y = vects
    mhd = K.sum(K.abs(x - y), axis=1, keepdims=True)
    return mhd

def exp_manhattan_distance(vects):
    # L1 distance with an exponential twist
    x, y = vects
    exp_mhd = K.exp(-K.sum(K.abs(x - y), axis=1, keepdims=True))
    return exp_mhd

def euclidean_distance(vects):
    # L2 distance
    x, y = vects
    sum_square = K.sum(K.square(x - y), axis=1, keepdims=True)
    ecd = K.sqrt(K.maximum(sum_square, K.epsilon()))
    return ecd

def cosine_distance(vects):
    # Cosine distance
    # similarity = cos(theta) = (x*y) / (sprt(sum(xi*xi))*sprt(sum(yi*yi)))
    x, y = vects
    x = K.l2_normalize(x, axis=-1) # x / sprt(sum(xi*xi))
    y = K.l2_normalize(y, axis=-1) # y / sprt(sum(yi*yi))
    cd = -K.sum(x * y, axis=-1, keepdims=True)
    return cd
    #return 1 - cd

In [6]:
# Siamese model
def create_model(data_shape, drop_rate=0.0, activation='relu', loss='mse', optimizer='adam'): 
    input0 = Input(shape=data_shape, name='data')
    
    dense0 = Dense(10, activation=None, name='1st_dense') (input0)
    drop0 = Dropout(drop_rate, name='1st_dropout') (dense0)
    act0 = Activation(activation, name='1st_activation') (drop0)
  
    dense1 = Dense(4, activation=None, name='last_dense') (act0)
    act1 = Activation(activation, name='shared_output') (dense1)
  
    shared_model = Model(inputs=input0, outputs=act1, name='shared_model')
    shared_model.summary()
  
    ##############
  
    input_siam_a = Input(shape=data_shape, name='input_siam_a')
    input_siam_b = Input(shape=data_shape, name='input_siam_b')
  
    model_a = shared_model(input_siam_a)
    model_b = shared_model(input_siam_b)

    #dist_layer = Lambda(manhattan_distance, name='distance_layer') ([model_a, model_b])
    #dist_layer = Lambda(exp_manhattan_distance, name='distance_layer') ([model_a, model_b])
    #dist_layer = Lambda(euclidean_distance, name='distance_layer') ([model_a, model_b])
    dist_layer = Lambda(cosine_distance, name='distance_layer') ([model_a, model_b])
  
    model = Model(inputs=[input_siam_a, input_siam_b], outputs=dist_layer)
  
    model.compile(loss=loss, optimizer=optimizer)
  
    return model

# Activations
relu = 'relu'
mish = tfa.activations.mish

# Losses
cl = tfa.losses.ContrastiveLoss(margin=1)

# Optimizers
adam = 'adam'
radam = tfa.optimizers.RectifiedAdam()
ranger = tfa.optimizers.Lookahead(radam, sync_period=6, slow_step_size=0.5)

# Create model
model = create_model(data_shape=(1),
                     drop_rate=0,
                     activation=relu,
                     loss=cl,
                     optimizer=adam)
model.summary()

Model: "shared_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
data (InputLayer)            [(None, 1)]               0         
_________________________________________________________________
1st_dense (Dense)            (None, 10)                20        
_________________________________________________________________
1st_dropout (Dropout)        (None, 10)                0         
_________________________________________________________________
1st_activation (Activation)  (None, 10)                0         
_________________________________________________________________
last_dense (Dense)           (None, 4)                 44        
_________________________________________________________________
shared_output (Activation)   (None, 4)                 0         
Total params: 64
Trainable params: 64
Non-trainable params: 0
__________________________________________________________