In [1]:
import os
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
from tqdm import tqdm
import random

In [2]:
celebs=[i for i in os.listdir('/kaggle/input/pubfig-dataset-256x256-jpg/CelebDataProcessed/')]
celebs=celebs[:10]

In [3]:
print(len(celebs))

10


In [4]:
train_images=[]
test_images=[]
Y_train=[]
Y_test=[]
for i in os.listdir('/kaggle/input/pubfig-dataset-256x256-jpg/CelebDataProcessed/'):
    if i in celebs:
        count=0
        for j in os.listdir('/kaggle/input/pubfig-dataset-256x256-jpg/CelebDataProcessed/'+i+'/'):
            if count < 10:
                Y_train.append(i)
                train_images.append('/kaggle/input/pubfig-dataset-256x256-jpg/CelebDataProcessed/'+i+'/'+j)
                count+=1
            elif count < 13 :
                Y_test.append(i)
                test_images.append('/kaggle/input/pubfig-dataset-256x256-jpg/CelebDataProcessed/'+i+'/'+j)
                count+=1
                
X_train=np.zeros((len(Y_train),256,256,3))
X_test=np.zeros((len(Y_test),256,256,3))

for i in tqdm(range(len(train_images))):
    X_train[i,:,:,:]=(cv2.cvtColor(cv2.imread(train_images[i]),cv2.COLOR_BGR2RGB))/255.0

for i in tqdm(range(len(test_images))):
    X_test[i,:,:,:]=(cv2.cvtColor(cv2.imread(test_images[i]),cv2.COLOR_BGR2RGB))/255.0

    
Y_train=np.array(Y_train)
Y_test=np.array(Y_test)
print(X_train.shape,Y_train.shape)
print(X_test.shape,Y_test.shape)

100%|██████████| 100/100 [00:00<00:00, 266.13it/s]
100%|██████████| 30/30 [00:00<00:00, 290.02it/s]

(100, 256, 256, 3) (100,)
(30, 256, 256, 3) (30,)





# Function to get triplets

In [5]:
def generate_triplets(X,Y,same,diff):
    
    anchor_images=np.zeros((len(Y)*same*diff,)+X.shape[1:],dtype=np.float32)
    same_images=np.zeros((len(Y)*same*same,)+X.shape[1:],dtype=np.float32)
    diff_images=np.zeros((len(Y)*diff*diff,)+X.shape[1:],dtype=np.float32)
    
    for i in tqdm(range(len(Y))):
        
        celeb=Y[i]
        anchor=X[i]
        
        same_idxs=np.where(Y==celeb)[0]
        same_idxs=np.delete(same_idxs,np.where(same_idxs==i))
        diff_idxs=np.where(Y!=celeb)[0]
        
        random_same=X[random.choices(same_idxs,k=same)]
        random_diff=X[random.choices(diff_idxs,k=diff)]
        
        anchor_images[i*(same*diff):(i+1)*(same*diff),:,:,:]=np.tile(anchor,(same*diff,1,1,1))
        
        count=0
        for j in range(0,same*same,same):
            same_images[j:j+same,:,:,:]=np.tile(random_same[count],(same,1,1,1))
            count+=1
        
        count=0
        for k in range(0,diff*diff,diff):
            diff_images[k:k+diff,:,:,:]=np.tile(random_diff[count],(diff,1,1,1))
            count+=1
        
    return anchor_images,same_images,diff_images
    

In [6]:
train_anchor,train_same,train_diff=generate_triplets(X_train,Y_train,6,6)
print(train_anchor.shape,train_same.shape,train_diff.shape)

100%|██████████| 100/100 [00:08<00:00, 11.29it/s]

(4900, 256, 256, 3) (4900, 256, 256, 3) (4900, 256, 256, 3)





 # Define a CNN Model

In [7]:
from tensorflow.keras.layers import Conv2D,MaxPooling2D,Dense,Dropout,BatchNormalization,Flatten,Input

In [8]:
def triplet_loss(y_actual,y_predicted,alpha=0.2):
    
    merged_total_shape=y_predicted.shape.as_list()[-1]
    
    anchor,pos,neg=y_predicted[:,:int(1/3*merged_total_shape)],y_predicted[:,int(1/3*merged_total_shape):int(2/3*merged_total_shape)],y_predicted[:,int(2/3*merged_total_shape):]
    
    pos_dist=tf.reduce_sum(tf.square(anchor - pos),axis=-1)
    neg_dist=tf.reduce_sum(tf.square(anchor - neg),axis=-1)
    
    loss_form = pos_dist - neg_dist + alpha
    loss = tf.reduce_sum(tf.maximum(loss_form,0.0))
    
    return loss

In [9]:
def get_cnn_base_model():
    
    model=tf.keras.Sequential()
    
    model.add(Conv2D(32,kernel_size=3,strides=1,padding='same',input_shape=(256,256,3),activation='relu'))
    model.add(MaxPooling2D((2,2)))
    model.add(BatchNormalization())
    
    model.add(Conv2D(64,3,padding='same',activation='relu'))
    model.add(MaxPooling2D((2,2)))
    model.add(BatchNormalization())
    
    model.add(Conv2D(64,3,padding='same',activation='relu'))
    model.add(MaxPooling2D((2,2)))
    model.add(BatchNormalization())
    
    model.add(Conv2D(128,3,padding='same',activation='relu'))
    model.add(MaxPooling2D((2,2)))
    model.add(BatchNormalization())
    
    model.add(Flatten())
    
    model.add(Dense(512,activation='relu'))
    model.add(Dropout(0.2))
    
    model.add(Dense(256,activation='relu'))
    model.add(Dropout(0.2))
    
    model.add(Dense(128,activation='relu'))
    
    model.summary()
    
    return model
    
    

In [10]:
anchor_input=Input((256,256,3),name='anchor_input_layer')
positive_input=Input((256,256,3),name='same_input_layer')
negative_input=Input((256,256,3),name='diff_input_layer')

cnn_base_model=get_cnn_base_model()

encoded_anchor=cnn_base_model(anchor_input)
encoded_positive=cnn_base_model(positive_input)
encoded_negative=cnn_base_model(negative_input)

merged_vector=tf.keras.layers.concatenate([encoded_anchor,encoded_positive,encoded_negative],axis=-1,name='merged_layer')

model=tf.keras.Model(inputs=[anchor_input,positive_input,negative_input],outputs=merged_vector)

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 256, 256, 32)      896       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 128, 128, 32)      0         
_________________________________________________________________
batch_normalization (BatchNo (None, 128, 128, 32)      128       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 128, 128, 64)      18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 64, 64, 64)        0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 64, 64, 64)        256       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 64, 64, 64)        3

In [11]:
model.compile(loss=triplet_loss,optimizer='adam')

In [None]:
Y_dummy=np.zeros((train_anchor.shape[0],1))

model.fit([train_anchor,train_same,train_diff],Y_dummy,epochs=100,batch_size=32)

In [None]:
cnn_base_model.save('/kaggle/working/triplet_loss_model')