In [1]:
from keras import backend as K
from keras.models import Model,load_model
from keras.layers import Input, Layer,GlobalAveragePooling2D,Dense,concatenate
from keras.applications.vgg16 import VGG16
from keras.regularizers import l2
from keras.optimizers import SGD,Adam

import cv2
import os
import numpy as np
import numpy.random as rng
import pickle

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
#load datasets

PATH = "./Bongard/BP_61" #CHANGE THIS - path where the pickled data is stored

with open(os.path.join(PATH, "train.pickle"), "rb") as f:
    (X,c) = pickle.load(f)

with open(os.path.join(PATH, "val.pickle"), "rb") as f:
    (X_val,cval) = pickle.load(f)
    
print("training alphabets")
print(c.keys())
print("validation alphabets:")
print(cval.keys())

training alphabets
dict_keys(['left', 'right'])
validation alphabets:
dict_keys(['left', 'right'])


In [3]:
class TripletBongard:
    def __init__(self, path, data_subsets = ["train", "val"]):
        self.data={}
        self.categories={}

        for name in data_subsets:
            file_path= os.path.join(path, name + ".pickle")
            print("loading data from {}".format(file_path))
            with open(file_path,"rb") as f:
                (X,c) = pickle.load(f)
                self.data[name] = X
                self.categories[name] = c

    def triplet_generator(self, batch_size, s="train"):

            X=self.data[s]
            n_classes, n_examples, w, h = X.shape

            anchor_bongards=np.zeros((batch_size, w, h,3)) 
            posi_bongards=np.zeros((batch_size,  w, h,3)) 
            neg_bongards=np.zeros((batch_size,  w, h,3)) 
            store= targets=np.zeros((batch_size,5))
            for i in range(batch_size):

                rand_idx = rng.randint(0, n_classes)
                anchor_bongard_idx = rng.randint(0, n_examples)
                posi_bongard_idx=rng.randint(0, n_examples)

                while anchor_bongard_idx == posi_bongard_idx:
                    posi_bongard_idx = rng.randint(0, n_examples)


                anchor_bongard = cv2.cvtColor(cv2.resize(X[rand_idx, anchor_bongard_idx], (w, h)), cv2.COLOR_GRAY2RGB)
                posi_bongard = cv2.cvtColor(cv2.resize(X[rand_idx, posi_bongard_idx], (w, h)), cv2.COLOR_GRAY2RGB)

                neg_idx=rng.randint(0, n_classes)
                while neg_idx==rand_idx:
                    neg_idx = rng.randint(0, n_classes)

                neg_bongard_idx=rng.randint(0, n_examples)
                neg_bongard = cv2.cvtColor(cv2.resize(X[neg_idx, neg_bongard_idx], (w, h)), cv2.COLOR_GRAY2RGB)
                
 
                anchor_bongards[i,:,:,:] = anchor_bongard/255.0
                posi_bongards[i,:,:,:] = posi_bongard/255.0
                neg_bongards[i,:,:,:] = neg_bongard/255.0
               
                store[i,0]=rand_idx
                store[i,1]= anchor_bongard_idx
                store[i,2]= posi_bongard_idx
                store[i,3]= neg_bongard_idx
                store[i,4]= neg_idx
             
            return anchor_bongards, posi_bongards,neg_bongards,store
        
    
        
                         
#Instantiate the class
TripletBongard_loader= TripletBongard(PATH)

loading data from ./Bongard/BP_61/train.pickle
loading data from ./Bongard/BP_61/val.pickle


In [4]:
def W_init(shape,name=None):
    """Initialize weights as in paper"""
    values = rng.normal(loc=0,scale=1e-2,size=shape)
    return K.variable(values,name=name)
#//TODO: figure out how to initialize layer biases in keras.
def b_init(shape,name=None):
    """Initialize bias as in paper"""
    values=rng.normal(loc=0.5,scale=1e-2,size=shape)
    return K.variable(values,name=name)

In [5]:
embedding_size=4096
input_shape = (105, 105, 3)
vgg16_model = VGG16(weights = 'imagenet', include_top = False)
x = vgg16_model.output
x = GlobalAveragePooling2D()(x)
predictions = Dense(embedding_size,activation="sigmoid",kernel_regularizer=l2(1e-3),kernel_initializer=W_init,bias_initializer=b_init)(x)
convnet=Model(input = vgg16_model.input, output = predictions)

  import sys


In [6]:
convnet.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, None, None, 3)     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, None, None, 64)    1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, None, None, 64)    36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, None, None, 64)    0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, None, None, 128)   73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, None, None, 128)   147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, None, None, 128)   0         
__________

In [7]:
# Input for anchor, positive and negative images
in_a = Input(shape=(105, 105, 3))
in_p = Input(shape=(105, 105, 3))
in_n = Input(shape=(105, 105, 3))

# Output for anchor, positive and negative embedding vectors
emb_a = convnet(in_a)
emb_p = convnet(in_p)
emb_n = convnet(in_n)

distance = concatenate([emb_a ,emb_p,emb_n],name='distance')

In [8]:
triplet = Model(inputs=[in_a, in_p, in_n],outputs=distance)
triplet.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            (None, 105, 105, 3)  0                                            
__________________________________________________________________________________________________
input_3 (InputLayer)            (None, 105, 105, 3)  0                                            
__________________________________________________________________________________________________
input_4 (InputLayer)            (None, 105, 105, 3)  0                                            
__________________________________________________________________________________________________
model_1 (Model)                 (None, 4096)         16815936    input_2[0][0]                    
                                                                 input_3[0][0]                    
          

In [9]:
# freeze all layers of the pre-trained model
for layer in vgg16_model.layers:
    layer.trainable = False
    
triplet.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            (None, 105, 105, 3)  0                                            
__________________________________________________________________________________________________
input_3 (InputLayer)            (None, 105, 105, 3)  0                                            
__________________________________________________________________________________________________
input_4 (InputLayer)            (None, 105, 105, 3)  0                                            
__________________________________________________________________________________________________
model_1 (Model)                 (None, 4096)         16815936    input_2[0][0]                    
                                                                 input_3[0][0]                    
          

In [10]:

def triplet_loss(y_true,y_pred):
    """
    Custom loss function. 
    Standard keras defined format
    """
    a = y_pred[:,0:embedding_size]
    p = y_pred[:,embedding_size:2*embedding_size]
    n = y_pred[:,2*embedding_size:]
    
    p_dist = K.sum(K.square(a-p), axis=-1)
    n_dist = K.sum(K.square(a-n), axis=-1)
    
    return K.sum(K.maximum(p_dist - n_dist + 0.2, 0), axis=0)   


In [11]:
optimizer = Adam(0.00006)
triplet.compile(optimizer = optimizer, loss = triplet_loss)

In [12]:
def img_to_encoding(image, model):
    w, h = image.shape
    x=cv2.cvtColor(cv2.resize(image,(w, h)), cv2.COLOR_GRAY2RGB)
    x1=x/255.0
    x1 = x1[np.newaxis, :]
    embedding = model.predict_on_batch(x1)
    return embedding   


In [13]:
database = {}
database["left_0"] = img_to_encoding(X[0][0], convnet)
database["left_1"] = img_to_encoding(X[0][1], convnet)
database["left_2"] = img_to_encoding(X[0][2], convnet)
database["left_3"] = img_to_encoding(X[0][3], convnet)
database["left_4"] = img_to_encoding(X[0][4], convnet)
database["left_5"] = img_to_encoding(X[0][5], convnet)
database["right_0"] = img_to_encoding(X[1][0], convnet)
database["right_1"] = img_to_encoding(X[1][1], convnet)
database["right_2"] = img_to_encoding(X[1][2], convnet)
database["right_3"] = img_to_encoding(X[1][3], convnet)
database["right_4"] = img_to_encoding(X[1][4], convnet)
database["right_5"] = img_to_encoding(X[1][5], convnet)

In [14]:
def which_is_it(image, database, model):
    encoding = img_to_encoding(image, model)
    min_dist = 100
    for (name, db_enc) in database.items():
        dist = np.linalg.norm(encoding - db_enc)
        if dist < min_dist:
            min_dist = dist
            identity = name
            
    return min_dist, identity

In [15]:
def test_bongard(X_val,model):
    n_classes_val, n_examples_val, w, h = X_val.shape
    m_val = n_classes_val * n_examples_val
    X_val=X_val.reshape(m_val, w, h)
    n_correct=0
    for i in range(m_val):
        min_dist,identity=which_is_it(X_val[i], database, model)
        if i < m_val/2:
            targets=0
        else:
            targets=1 
        
        if identity in ["left_0","left_1","left_2","left_3","left_4","left_5"]:
            test_result=0
        else:
            test_result=1
            
        if test_result== targets:
            n_correct+=1
         
        #print(identity)
    percent_correct = (100.0*n_correct / m_val)
    print("Got an average of {}% accuracy".format(percent_correct))
     
    return percent_correct

In [16]:
batch_size=2
n_iter=1000
y = [i for i in range(batch_size)]
qy = np.array(y)

print("training")
for i in range(1, n_iter):
    a,p,n,store= TripletBongard_loader.triplet_generator(batch_size=2)
    loss=triplet.train_on_batch([a,p,n], qy)
    print(loss)
   # if i > n_iter/2:
        #print("evaluating")
       # val_acc = test_bongard(X_val,convnet)
    

training
0.3102381
0.7225119
0.94202983
0.38728887
0.8363625
0.6407454
0.9094435
0.3811335
0.6383462
0.6261188
0.8196841
0.57960045
0.7165394
0.449789
0.4910547
0.633789
0.7823911
0.32462627
0.5830572
0.5666165
0.50430715
0.258701
0.44976115
0.47184324
0.7464703
0.72544736
0.70490223
0.1925257
0.83427465
0.390732
0.19074953
0.36410934
0.25612515
0.31240138
0.65934575
0.68835855
0.37998888
0.18670455
0.39906746
0.35700548
0.41086346
0.24340506
0.8058442
0.20111497
0.5118183
0.46333396
0.49964342
0.37342334
0.19345541
0.2986676
0.49917763
0.77486014
0.47351402
0.69020975
0.45419633
0.2434729
0.22619568
0.653301
0.5862309
0.48854318
0.5383493
0.5633489
0.4298141
0.35816866
0.6345331
0.5198761
0.4898855
0.47401285
0.30192792
0.47390166
0.7800398
0.3946751
0.286106
0.16944252
0.39832938
0.51206577
0.16822572
0.21092652
0.42432737
0.16703974
0.3988075
0.49308467
0.5475849
0.97071576
0.26760584
0.31896588
0.43106955
0.34559712
0.54466426
0.2749616
0.53877044
0.59657705
0.38124937
0.31573987
0

0.07068418
0.070607856
0.07053148
0.070455074
0.070378646
0.070302196
0.070225745
0.0701493
0.070072874
0.06999646
0.06992008
0.069843724
0.069767416
0.06969116
0.06961493
0.06953877
0.069462664
0.069386624
0.069310635
0.06923471
0.06915885
0.069083065
0.06900735
0.068931706
0.068856135
0.06878063
0.068705216
0.06862987
0.06855461
0.06847942
0.075417206
0.06834942
0.06829284
0.06823468
0.06817508
0.06811416
0.06805202
0.067988805
0.067924574
0.06785945
0.06779351
0.067726836
0.0676595
0.06759157
0.06752312
0.06745419
0.06738483
0.06731511
0.06724506
0.067174725
0.067104116
0.067033306
0.06696229
0.06689111
0.066819794
0.06674836
0.06667681
0.06660519
0.06653351
0.066461764
0.06638999
0.06631819
0.06624637
0.066174544
0.06610271
0.06603091
0.06595912
0.06588735
0.065815605
0.06851842
0.065691054
0.065636605
0.065580666
0.06552337
0.065464824
0.06540513
0.06534439
0.06528272
0.06522017
0.06515685
0.06509282
0.06502818
0.06496297
0.06489725
0.06483108
0.06476451
0.06469757
0.06463032
0.06

In [17]:
convnet.save('bottleneck_fc_model.h5')

In [18]:
val_acc = test_bongard(X_val,convnet)

Got an average of 54.2% accuracy


In [19]:
for layer in convnet.layers[15:]:
    layer.trainable = True
convnet.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, None, None, 3)     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, None, None, 64)    1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, None, None, 64)    36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, None, None, 64)    0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, None, None, 128)   73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, None, None, 128)   147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, None, None, 128)   0         
__________

In [24]:
# Input for anchor, positive and negative images
in_a_1 = Input(shape=(105, 105, 3))
in_p_1 = Input(shape=(105, 105, 3))
in_n_1 = Input(shape=(105, 105, 3))

# Output for anchor, positive and negative embedding vectors
emb_a_1 = convnet(in_a_1)
emb_p_1 = convnet(in_p_1)
emb_n_1 = convnet(in_n_1)

distance_1 = concatenate([emb_a_1 ,emb_p_1,emb_n_1],name='distance')

In [27]:
triplet_1 = Model(inputs=[in_a_1, in_p_1, in_n_1],outputs=distance_1)
triplet_1.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_11 (InputLayer)           (None, 105, 105, 3)  0                                            
__________________________________________________________________________________________________
input_12 (InputLayer)           (None, 105, 105, 3)  0                                            
__________________________________________________________________________________________________
input_13 (InputLayer)           (None, 105, 105, 3)  0                                            
__________________________________________________________________________________________________
model_1 (Model)                 (None, 4096)         16815936    input_11[0][0]                   
                                                                 input_12[0][0]                   
          

In [28]:
optimizer = SGD(lr=1e-4, momentum=0.9)
triplet_1.compile(optimizer = optimizer, loss = triplet_loss)

In [29]:
batch_size=2
n_iter=1000
y = [i for i in range(batch_size)]
qy = np.array(y)

print("training")
for i in range(1, n_iter):
    a,p,n,store= TripletBongard_loader.triplet_generator(batch_size=2)
    loss=triplet.train_on_batch([a,p,n], qy)
    print(loss)
    if i > n_iter/2:
        print("evaluating")
        val_acc = test_bongard(X_val,convnet)

training
0.055226974
0.055166915
0.055106916
0.055046972
0.054987106
0.05492729
0.054867525
0.054807827


  'Discrepancy between trainable weights and collected trainable'


0.05474819
0.05468861
0.054629102
0.054569647
0.05451026
0.05445093
0.054391664
0.05433246
0.054273322
0.059081264
0.0541778
0.054139547
0.05409961
0.054058097
0.054015115
0.05397078
0.053925198
0.053878475
0.053830706
0.053781983
0.053732395
0.053682026
0.053630933
0.057338215
0.05354103
0.05350169
0.053461164
0.05341946
0.0533766
0.05333262
0.053287566
0.05324152
0.05319453
0.05314665
0.053097963
0.053048525
0.052998394
0.05294764
0.052896313
0.052844465
0.05279214
0.052739393
0.052686267
0.0526328
0.05257903
0.052524984
0.052470703
0.052416205
0.052361526
0.05230668
0.052251697
0.052196592
0.052141383
0.052086093
0.052030716
0.051975287
0.05191981
0.051864296
0.05180876
0.0517532
0.051697634
0.05164206
0.05158649
0.06328373
0.051495284
0.05145804
0.05141928
0.051379114
0.05133763
0.051294938
0.05125112
0.051206276
0.051160485
0.051113836
0.051066406
0.051018268
0.05096948
0.05092012
0.05087023
0.050819878
0.050769106
0.050717942
0.05066646
0.050614674
0.05056263
0.05051035
0.0504578

KeyboardInterrupt: 