In [5]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input"))
print(os.listdir('../'))

# Any results you write to the current directory are saved as output.

['test', 'sample_submission.csv', 'train.csv', 'train']
['lib', 'working', 'config', 'input']


In [16]:
from sklearn.model_selection import train_test_split
from itertools import combinations_with_replacement
from PIL import Image
import cv2

data = pd.read_csv("../input/train.csv")
train_data, test_data = train_test_split(data, test_size=0.1, random_state=2018)
train_data.to_csv('../working/train.csv', index=False)
test_data.to_csv('../working/test.csv', index=False)

def preprocess(data):

    img1 = []
    img2 = []
    label = []

    imgs = data.Image.values
    perm = combinations_with_replacement(imgs, 2)

    for p in perm:
        (x1, x2)= p
        if (data[data.Image == x1].Id.values == data[data.Image == x2].Id.values): y=1.0
        else:y=0
        img1.append(x1)
        img2.append(x2)
        label.append(y)

        if len(label) == 5000: break

    return img1, img2, label

def read_image(path, image_name, base_size):
    
    image_path = os.path.join(path, image_name)
    img = cv2.imread(image_path, 0)
    img = cv2.resize(img, (base_size, base_size))
    img = img / 255
    
    return img

def generator(data, batch_size):
    base_size = 105
    while True:
        for df in pd.read_csv(data, chunksize=batch_size):
            img1, img2, label = preprocess(df)
            left_input = np.zeros((len(img1), base_size, base_size, 1))
            right_input = np.zeros((len(img2), base_size, base_size, 1))
            
            for i, img_name in enumerate(img1):
                left_input[i, :, :, 0] = read_image(path='../input/train', image_name=img_name, base_size=base_size)
            for i, img_name in enumerate(img2):
                right_input[i, :, :, 0] = read_image(path='../input/train', image_name=img_name, base_size=base_size)
            
            yield [left_input, right_input], label

gen = generator(data='../working/train.csv', batch_size=12)
[left_input, right_input], label = next(gen)

In [21]:
import keras.backend as K
from keras.layers import Conv2D, Dense, MaxPool2D, Flatten, Input, merge, subtract, Lambda
from keras.models import Sequential, Model
from keras.initializers import random_normal
from keras import optimizers
from keras import metrics
from keras import losses

class Siamese_Net(object):

    def __init__(self, input_shape):

        self.input_shape = input_shape
        self.initializers_weight = random_normal(mean=0.0, stddev=0.01)
        self.initializers_bias = random_normal(mean=0.5, stddev=0.01)
        self.initializers_weight_fully = random_normal(mean=0.0, stddev=0.2)

    def build(self):

        left_input = Input(self.input_shape)
        right_input = Input(self.input_shape)

        convert = Sequential()
        convert.add(Conv2D(64, kernel_size=10, strides=(1,1), activation='relu',
                           kernel_initializer=self.initializers_weight, bias_initializer=self.initializers_bias))
        convert.add(MaxPool2D(pool_size=2))
        convert.add(Conv2D(128, kernel_size=7, strides=(1,1), activation='relu',
                           kernel_initializer=self.initializers_weight, bias_initializer=self.initializers_bias))
        convert.add(MaxPool2D(pool_size=2))
        convert.add(Conv2D(128, kernel_size=4, strides=(1,1), activation='relu',
                           kernel_initializer=self.initializers_weight, bias_initializer=self.initializers_bias))
        convert.add(MaxPool2D(pool_size=2))
        convert.add(Conv2D(256, kernel_size=4, strides=(1,1), activation='relu',
                           kernel_initializer=self.initializers_weight, bias_initializer=self.initializers_bias))
        #the units in the final convolutional layer are flattened into a single vector
        convert.add(Flatten())
        #the convolutional layer is followed by a fully connected layer
        convert.add(Dense(5005, activation='sigmoid', kernel_initializer=self.initializers_weight_fully,
                          bias_initializer=self.initializers_bias))

        left_features = convert(left_input)
        right_features = convert(right_input)

        #L1 siamese dist
        dist = Lambda(lambda x: K.abs(x[0]-x[1]))([left_features, right_features])

        #fully connected + sigmoid
        out = Dense(1, activation='sigmoid')(dist)

        model = Model(inputs=[left_input, right_input], outputs=out)

        return model

model = Siamese_Net(input_shape=(105, 105, 1)).build()
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_9 (InputLayer)            (None, 105, 105, 1)  0                                            
__________________________________________________________________________________________________
input_10 (InputLayer)           (None, 105, 105, 1)  0                                            
__________________________________________________________________________________________________
sequential_5 (Sequential)       (None, 5005)         47325901    input_9[0][0]                    
                                                                 input_10[0][0]                   
__________________________________________________________________________________________________
lambda_4 (Lambda)               (None, 5005)         0           sequential_5[1][0]               
          

In [23]:
model.compile(optimizer= optimizers.Adam(lr=0.001),loss=losses.binary_crossentropy, metrics=[metrics.binary_accuracy])

In [24]:
model.fit_generator(gen, epochs=100, verbose=1, steps_per_epoch=len())

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fa9e45c3c50>