In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

from keras.layers.embeddings import Embedding

from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Dense, GlobalAveragePooling1D, Flatten, Lambda
from keras import backend as K

from keras import Input
from keras.models import Model

In [None]:
'''
Since we are dealing with tensors, we cannot directly use sum/pow etc functions.
Fot this tensorflow has a library called backend where we can find all sorts of
function to operate on tensors
'''

def euclidean_distance(vects):
    x, y = vects
    sum_square = K.sum(K.square(x - y), axis=1, keepdims=True)
    return K.sqrt(K.maximum(sum_square, K.epsilon()))

def eucl_dist_output_shape(shapes):
    shape1, shape2 = shapes
    return (shape1[0], 1)


def contrastive_loss(y_true, y_pred):
    '''Contrastive loss from Hadsell-et-al.'06
    http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
    '''
    margin = 1
    square_pred = K.square(y_pred)
    margin_square = K.square(K.maximum(margin - y_pred, 0))

    return K.mean(y_true * square_pred + (1 - y_true) * margin_square)

def accuracy(y_true, y_pred):
    '''Compute classification accuracy with a fixed threshold on distances.
    '''
    return K.mean(K.equal(y_true, K.cast(y_pred < 0.5, y_true.dtype)))

### **Training 1 network for Different Inputs**



1.   Siamese means twin, this architecture takes in 2 inputs, passes it from one model and outputs the embeddings
2.   The 2 inputs can be an image or text inputs or any vector
3. After we get the embeddings, we calculate the distances between them and using a loss function either contrastive or triplet try to reduce distance between inputs of same class and increase distances between inputs from different classes
4. Siamese models are used when we have large number of classes say 100s or 1000s. The idea is to learn between 2 inputs.



In [None]:
'''
Model we choose here can be any model, CNN/RNN/ANN or any 
ML(decision trees/randomforest etc.) arcitecture.
I have used an ANN with embedding as I was working on text data.

Embedding layer works as a word2vec layer and converts words from our input 
sentence to a vector. Here, in the e.g. below, I have sentences with a fix
length of 15 (after using padding). For every word in the sentence, it will
create a vector of size 32.
  Say we have 100 training examples, our input will be (100,15) i.e. 100 
sentences of length 15 each. Out of the embedding layer will be (100,15,32) i.e.
100 vectors with 15 words each and each word is represented by a vector of
length 32
'''

vocab = 100
model = Sequential()
model.add(Embedding(input_dim=vocab, output_dim= 32, input_length=15))
model.add(GlobalAveragePooling1D())
model.add(Dense(32, 'relu'))
model.add(Dense(16, activation='relu'))

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 15, 32)            3200      
_________________________________________________________________
global_average_pooling1d (Gl (None, 32)                0         
_________________________________________________________________
dense (Dense)                (None, 32)                1056      
_________________________________________________________________
dense_1 (Dense)              (None, 16)                528       
Total params: 4,784
Trainable params: 4,784
Non-trainable params: 0
_________________________________________________________________


In [None]:
'''
Here we define the input shape as we have defined in the model, basically we 
define 2 tensors of the input shape same as our model's input shape
'''
left_input = Input(shape=(15))
right_input = Input(shape=(15))

In [None]:
'''
We pass both our inputs from the same model and get the output embedding. Since,
we had 100 examples, final output will be of shape (100,16) 16 is because of the
last dense layer in the model
'''
left_input_embedding = model(left_input)
right_input_embedding = model(right_input)

In [None]:
'''
Keras provides us with a customizable layer called lambda where we can pass our
function and get the output. Here, we want to calculate the distance between
embeddings of 2 inputs. We pass the previously defined euclidean_distance
function and pass the embeddings of 2 inputs
'''

lambdaLayer = Lambda(euclidean_distance,
                  output_shape=eucl_dist_output_shape)([left_input_embedding, right_input_embedding])

In [None]:
'''
Model() method stitches the customized layers to the previously defined
sequential layers.
'''
siamese_model = Model(inputs=[left_input, right_input], outputs = lambdaLayer)

In [None]:
siamese_model.summary()

Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 15)]         0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 15)]         0                                            
__________________________________________________________________________________________________
sequential (Sequential)         (None, 16)           4784        input_1[0][0]                    
                                                                 input_2[0][0]                    
__________________________________________________________________________________________________
lambda (Lambda)                 (None, 1)            0           sequential[0][0]      

In [None]:
'''
Finally using a distance based loss function we compile and fit our model
'''
siamese_model.compile(loss=contrastive_loss, optimizer='adam', metrics=[accuracy])
siamese_model.fit([data_1, data_2], y_data, batch_size=64,epochs=10)

In [None]:
'''
Whole idea of a Siamese network is to learn a similarity function, it's either 
using contrastive loss of triplet loss etc. 
I noticed in some application people have added a sigmoid or softmax layer after
the lambda layer and have tried to predict the class of interest. This is another
application of siamese model but it takes the essence away as we use it when we
have lot of classes and few example per class
'''

lambdaLayer = Dense(1, activation='sigmoid')(lambdaLayer)

In [None]:
siamese_model = Model(inputs=[left_input, right_input], outputs = lambdaLayer)

In [None]:
siamese_model.summary()

Model: "functional_3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 15)]         0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 15)]         0                                            
__________________________________________________________________________________________________
sequential (Sequential)         (None, 16)           4784        input_1[0][0]                    
                                                                 input_2[0][0]                    
__________________________________________________________________________________________________
lambda (Lambda)                 (None, 1)            0           sequential[0][0]      

In [None]:
'''
Since we have an output layer with sigmoid activation, we can have the loss as 
binary crossentropy. Similarly, if we want to use softmax layer, we can use
categorical crossentropy.
'''

siamese_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[accuracy])
siamese_model.fit([data_1, data_2], y_data, batch_size=64,epochs=10)

### **Training Different Networks for 2 Inputs**

Below is a variation of Siamese model, earlier we had 2 similar inputs and we
passed them from 1 model to get embeddings. Suppose we have 2 totally different inputs, say one is image of a person and other is eye color or hair color etc.
  In this case we need 2 separate models to spit out 2 different embedding and
we can again use our distance based loss function to make the 2 embedding as
close/far from each other depending on the case in hand.

1.   We define model architecture 1
2.   We define model architecture 2
3. We pass the o/p of each of the modelsfrom a customized layer to get the distances
4. We calculate the loss using the distance based contrastive or triplet loss functions



In [None]:
# Define Model Architecture 1

def model_arc_1():

  input = Input(batch_shape=(None,15,))

  model_desc = Embedding(input_dim=100, output_dim= 32, input_length=15)(input)
  model_desc = GlobalAveragePooling1D()(model_desc)
  model_desc = Dense(32, 'relu')(model_desc)

  return Model(input, model_desc)


In [None]:
# Define Model Architecture 2

def model_arc_2():

  input = Input(batch_shape=(None,3,))

  model_icd = Embedding(input_dim=100, output_dim= 32, input_length=3)(input)
  model_icd = GlobalAveragePooling1D()(model_icd)
  model_icd = Dense(32, 'relu')(model_icd)

  return Model(input, model_icd)

In [None]:
# Create objects of both model types
model1 = model_arc_1()
model2 = model_arc_2()

In [None]:
#Customized layer for calculating distances
lambdaLayer = Lambda(euclidean_distance,
                  output_shape=eucl_dist_output_shape)([model1.output, model2.output])

In [None]:
#Stitch the layers using Model function and define
siamese_model_v2 = Model(inputs=[model1.input, model2.input], outputs = lambdaLayer)

In [None]:
siamese_model_v2.summary()

Model: "functional_19"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_17 (InputLayer)           [(None, 15)]         0                                            
__________________________________________________________________________________________________
input_18 (InputLayer)           [(None, 3)]          0                                            
__________________________________________________________________________________________________
embedding_6 (Embedding)         (None, 15, 32)       3200        input_17[0][0]                   
__________________________________________________________________________________________________
embedding_7 (Embedding)         (None, 3, 32)        3200        input_18[0][0]                   
______________________________________________________________________________________

In [None]:
siamese_model.compile(loss=contrastive_loss, optimizer='adam', metrics=[accuracy])
siamese_model.fit([tokenized_data_1, tokenized_data_2], y_data, batch_size=64,epochs=1)