In [1]:
from keras import backend as K
from keras.models import Model,load_model
from keras.layers import Input, Layer,GlobalAveragePooling2D,Dense
from keras.applications.vgg16 import VGG16
from keras.regularizers import l2

import cv2
import os
import numpy as np
import numpy.random as rng
import pickle

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
def W_init(shape,name=None):
    """Initialize weights as in paper"""
    values = rng.normal(loc=0,scale=1e-2,size=shape)
    return K.variable(values,name=name)
#//TODO: figure out how to initialize layer biases in keras.
def b_init(shape,name=None):
    """Initialize bias as in paper"""
    values=rng.normal(loc=0.5,scale=1e-2,size=shape)
    return K.variable(values,name=name)

In [3]:
input_shape = (105, 105, 3)
vgg16_model = VGG16(weights = 'imagenet', include_top = False)
x = vgg16_model.output
x = GlobalAveragePooling2D()(x)
predictions = Dense(1024,activation="sigmoid",kernel_regularizer=l2(1e-3),kernel_initializer=W_init,bias_initializer=b_init)(x)
convnet=Model(input = vgg16_model.input, output = predictions)

  


In [4]:
# Input for anchor, positive and negative images
in_a = Input(shape=(105, 105, 3))
in_p = Input(shape=(105, 105, 3))
in_n = Input(shape=(105, 105, 3))

# Output for anchor, positive and negative embedding vectors
emb_a = convnet(in_a)
emb_p = convnet(in_p)
emb_n = convnet(in_n)

In [5]:
class TripletLossLayer(Layer):
    def __init__(self, alpha, **kwargs):
        self.alpha = alpha
        super(TripletLossLayer, self).__init__(**kwargs)
    
    def triplet_loss(self, inputs):
        a, p, n = inputs
        p_dist = K.sum(K.square(a-p), axis=-1)
        n_dist = K.sum(K.square(a-n), axis=-1)
        return K.sum(K.maximum(p_dist - n_dist + self.alpha, 0), axis=0)
    
    def call(self, inputs):
        loss = self.triplet_loss(inputs)
        self.add_loss(loss)
        return loss
    
# Layer that computes the triplet loss from anchor, positive and negative embedding vectors
triplet_loss_layer = TripletLossLayer(alpha=0.2, name='triplet_loss_layer')([emb_a, emb_p, emb_n])

# Model that can be trained with anchor, positive negative images
triplet_net = Model([in_a, in_p, in_n], triplet_loss_layer)

In [6]:
triplet_net.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            (None, 105, 105, 3)  0                                            
__________________________________________________________________________________________________
input_3 (InputLayer)            (None, 105, 105, 3)  0                                            
__________________________________________________________________________________________________
input_4 (InputLayer)            (None, 105, 105, 3)  0                                            
__________________________________________________________________________________________________
model_1 (Model)                 (None, 4096)         16815936    input_2[0][0]                    
                                                                 input_3[0][0]                    
          

In [7]:
# freeze all layers of the pre-trained model
for layer in vgg16_model.layers:
    layer.trainable = False
    
triplet_net.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            (None, 105, 105, 3)  0                                            
__________________________________________________________________________________________________
input_3 (InputLayer)            (None, 105, 105, 3)  0                                            
__________________________________________________________________________________________________
input_4 (InputLayer)            (None, 105, 105, 3)  0                                            
__________________________________________________________________________________________________
model_1 (Model)                 (None, 4096)         16815936    input_2[0][0]                    
                                                                 input_3[0][0]                    
          

In [8]:
#load datasets

PATH = "./Bongard/BP_2" #CHANGE THIS - path where the pickled data is stored

with open(os.path.join(PATH, "train.pickle"), "rb") as f:
    (X,c) = pickle.load(f)

with open(os.path.join(PATH, "val.pickle"), "rb") as f:
    (X_val,cval) = pickle.load(f)
    
print("training alphabets")
print(c.keys())
print("validation alphabets:")
print(cval.keys())

training alphabets
dict_keys(['left', 'right'])
validation alphabets:
dict_keys(['left', 'right'])


In [9]:
class TripletBongard:
    def __init__(self, path, data_subsets = ["train", "val"]):
        self.data={}
        self.categories={}

        for name in data_subsets:
            file_path= os.path.join(path, name + ".pickle")
            print("loading data from {}".format(file_path))
            with open(file_path,"rb") as f:
                (X,c) = pickle.load(f)
                self.data[name] = X
                self.categories[name] = c

    def triplet_generator(self, batch_size, s="train"):

            X=self.data[s]
            n_classes, n_examples, w, h = X.shape

            anchor_bongards=np.zeros((batch_size, w, h,3)) 
            posi_bongards=np.zeros((batch_size,  w, h,3)) 
            neg_bongards=np.zeros((batch_size,  w, h,3)) 
            store= targets=np.zeros((batch_size,5))
            for i in range(batch_size):

                rand_idx = rng.randint(0, n_classes)
                anchor_bongard_idx = rng.randint(0, n_examples)
                posi_bongard_idx=rng.randint(0, n_examples)

                while anchor_bongard_idx == posi_bongard_idx:
                    posi_bongard_idx = rng.randint(0, n_examples)


                anchor_bongard = cv2.cvtColor(cv2.resize(X[rand_idx, anchor_bongard_idx], (w, h)), cv2.COLOR_GRAY2RGB)
                posi_bongard = cv2.cvtColor(cv2.resize(X[rand_idx, posi_bongard_idx], (w, h)), cv2.COLOR_GRAY2RGB)

                neg_idx=rng.randint(0, n_classes)
                while neg_idx==rand_idx:
                    neg_idx = rng.randint(0, n_classes)

                neg_bongard_idx=rng.randint(0, n_examples)
                neg_bongard = cv2.cvtColor(cv2.resize(X[neg_idx, neg_bongard_idx], (w, h)), cv2.COLOR_GRAY2RGB)
                
 
                anchor_bongards[i,:,:,:] = anchor_bongard/255.0
                posi_bongards[i,:,:,:] = posi_bongard/255.0
                neg_bongards[i,:,:,:] = neg_bongard/255.0
               
                store[i,0]=rand_idx
                store[i,1]= anchor_bongard_idx
                store[i,2]= posi_bongard_idx
                store[i,3]= neg_bongard_idx
                store[i,4]= neg_idx
             
            return anchor_bongards, posi_bongards,neg_bongards,store
        
                         
#Instantiate the class
TripletBongard_loader= TripletBongard(PATH)

loading data from ./Bongard/BP_2/train.pickle
loading data from ./Bongard/BP_2/val.pickle


In [10]:
a,p,n,store= TripletBongard_loader.triplet_generator(batch_size=4)

In [11]:
def triplet_generator_2( ):
    ''' Dummy triplet generator for API usage demo only.
    Will be replaced by a version that uses real image data later.
    :return: a batch of (anchor, positive, negative) triplets
    '''
    while True:
        a_batch = a
        p_batch = p
        n_batch = n
        yield [a_batch , p_batch, n_batch], None

In [12]:
generator2= triplet_generator_2( )

In [None]:
triplet_net.compile(loss=None, optimizer='adam')
triplet_net.fit_generator(generator2, epochs=10, steps_per_epoch=500)

  """Entry point for launching an IPython kernel.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10

In [None]:
##################################################

In [None]:
def img_to_encoding(image, model):
    w, h = image.shape
    x=cv2.cvtColor(cv2.resize(image,(w, h)), cv2.COLOR_GRAY2RGB)
    x1=x/255.0
    x1 = x1[np.newaxis, :]
    embedding = model.predict_on_batch(x1)
    return embedding   

In [None]:
database = {}
database["left_0"] = img_to_encoding(X[0][0], convnet)
database["left_1"] = img_to_encoding(X[0][1], convnet)
database["left_2"] = img_to_encoding(X[0][2], convnet)
database["left_3"] = img_to_encoding(X[0][3], convnet)
database["left_4"] = img_to_encoding(X[0][4], convnet)
database["left_5"] = img_to_encoding(X[0][5], convnet)
database["right_0"] = img_to_encoding(X[1][0], convnet)
database["right_1"] = img_to_encoding(X[1][1], convnet)
database["right_2"] = img_to_encoding(X[1][2], convnet)
database["right_3"] = img_to_encoding(X[1][3], convnet)
database["right_4"] = img_to_encoding(X[1][4], convnet)
database["right_5"] = img_to_encoding(X[1][5], convnet)

In [None]:
def which_is_it(image, database, model):
    encoding = img_to_encoding(image, model)
    min_dist = 100
    for (name, db_enc) in database.items():
        dist = np.linalg.norm(encoding - db_enc)
        if dist < min_dist:
            min_dist = dist
            identity = name
            
    return min_dist, identity

In [None]:
def test_bongard(X_val,model):
    n_classes_val, n_examples_val, w, h = X_val.shape
    m_val = n_classes_val * n_examples_val
    X_val=X_val.reshape(m_val, w, h)
    n_correct=0
    for i in range(m_val):
        min_dist,identity=which_is_it(X_val[i], database, model)
        if i < m_val/2:
            targets=0
        else:
            targets=1 
        
        if identity in ["left_0","left_1","left_2","left_3","left_4","left_5"]:
            test_result=0
        else:
            test_result=1
            
        if test_result== targets:
            n_correct+=1
         
        #print(identity)
    percent_correct = (100.0*n_correct / m_val)
    print("Got an average of {}% accuracy".format(percent_correct))
     
    return percent_correct
        

In [None]:
val_acc = test_bongard(X_val,convnet)