## References

Our Paper: https://www.cs.cornell.edu/~kb/publications/SIG15ProductNet.pdf

Keras Siamese: https://github.com/keras-team/keras/blob/master/examples/mnist_siamese.py

## To Do

Finish Data Pipeline

1. Need to use "flow_from_dataframe"

In [1]:
import tensorflow as tf
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Input, Flatten, Dense, Dropout, Lambda
from tensorflow.keras.applications import resnet50
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import RMSprop

In [2]:
tf.__version__

'2.0.0'

In [3]:
def euclidean_distance(vects):
    x, y = vects
    sum_square = K.sum(K.square(x - y), axis=1, keepdims=True)
    return K.sqrt(K.maximum(sum_square, K.epsilon()))


def eucl_dist_output_shape(shapes):
    shape1, shape2 = shapes
    return (shape1[0], 1)

def contrastive_loss(y_true, y_pred):
    '''Contrastive loss from Hadsell-et-al.'06
    http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
    '''
    
    margin = 1
    square_pred = K.square(y_pred)
    margin_square = K.square(K.maximum(margin - y_pred, 0))
    return K.mean(y_true * square_pred + (1 - y_true) * margin_square)

def siam_accuracy(y_true, y_pred):
    '''Compute classification accuracy with a fixed threshold on distances.
    '''    
    return K.mean(K.equal(y_true, K.cast(y_pred < 0.5, y_true.dtype)))

In [4]:
def create_base_model(input_shape, num_classes):
    image_input = Input(shape=input_shape)
    model = resnet50.ResNet50(weights="imagenet", include_top=True,
                          input_tensor=image_input)
    # 2048-D vector output
    embd_output = model.get_layer('avg_pool').output
    # can add more dense layers inbetween if required
    classification_output = Dense(num_classes, activation='softmax', name='output_layer')(embd_output)
    custom_resnet_model = Model(inputs=image_input, outputs= [embd_output, classification_output])
    return custom_resnet_model

def create_siamese_model(input_shape, num_classes):
    input_a = Input(shape=input_shape)
    input_b = Input(shape=input_shape)
    model = create_base_model(input_shape, num_classes)
    embd_a, class_a = model(input_a)
    embd_b, class_b = model(input_b)
    # l2 norm for embeddings
    norm_embd_a = K.l2_normalize(embd_a, axis=1)
    norm_embd_b = K.l2_normalize(embd_b, axis=1)
    # distance between embeddings
    distance = Lambda(euclidean_distance, output_shape=eucl_dist_output_shape)([norm_embd_a, norm_embd_b])
    custom_siamese_model = Model([input_a, input_b], [distance, class_a, class_b])
    return custom_siamese_model

In [5]:
input_shape = (224, 224, 3)
num_classes = 46

In [6]:
s_model = create_siamese_model(input_shape, num_classes)

In [7]:
s_model.output_names

['lambda', 'model', 'model_1']

In [8]:
output_names = s_model.output_names
losses = {
    output_names[0]: contrastive_loss,
    output_names[1]: "categorical_crossentropy",
    output_names[2]: "categorical_crossentropy",
}
lossWeights = {output_names[0]:1.0,output_names[1]: 1.0, output_names[2]: 1.0}

#top k accuracy would be better i guess
all_metrics = {
    output_names[0]: siam_accuracy,
    output_names[1]: "accuracy",
    output_names[2]: "accuracy",
}

In [9]:
rms = RMSprop()
s_model.compile(loss=losses, loss_weights=lossWeights, optimizer=rms, metrics=all_metrics)

## Data Pipeline

In [10]:
import pandas as pd
import numpy as np

In [20]:
# get_category_names
with open('data/list_category_cloth.txt', 'r') as f:
    categories = []
    for i, line in enumerate(f.readlines()):
        if i > 1:
            categories.append(line.split(' ')[0])
            
# get image category map
with open('data/list_category_img.txt', 'r') as f:
    images = []
    for i, line in enumerate(f.readlines()):
        if i > 1:
            images.append([word.strip() for word in line.split(' ') if len(word) > 0])
            
#get train, valid, test split
with open('data/list_eval_partition.txt', 'r') as f:
    images_partition = []
    for i, line in enumerate(f.readlines()):
        if i > 1:
            images_partition.append([word.strip() for word in line.split(' ') if len(word) > 0])

In [21]:
data_df = pd.DataFrame(images, columns=['images', 'category_label'])
partition_df = pd.DataFrame(images_partition, columns=['images', 'dataset'])

In [22]:
data_df['category_label'] = data_df['category_label'].astype(int)

In [23]:
data_df = data_df.merge(partition_df, on='images')

In [24]:
data_df['dataset'].value_counts()

train    209222
val       40000
test      40000
Name: dataset, dtype: int64

In [25]:
data_df['category'] = data_df['category_label'].apply(lambda x: categories[int(x) - 1])

In [26]:
data_df['category_label'].nunique(), data_df['category'].nunique()

(46, 46)

In [27]:
data_df.head()

Unnamed: 0,images,category_label,dataset,category
0,img/Sheer_Pleated-Front_Blouse/img_00000001.jpg,3,train,Blouse
1,img/Sheer_Pleated-Front_Blouse/img_00000002.jpg,3,train,Blouse
2,img/Sheer_Pleated-Front_Blouse/img_00000003.jpg,3,val,Blouse
3,img/Sheer_Pleated-Front_Blouse/img_00000004.jpg,3,train,Blouse
4,img/Sheer_Pleated-Front_Blouse/img_00000005.jpg,3,test,Blouse
