In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from data_augmentation import random_transform


from keras.models import Sequential, Model
from keras.optimizers import Adam
from keras.layers import Dense, Conv2D, MaxPooling2D, Reshape, Flatten, Input, merge, subtract
from keras import backend as K

Using TensorFlow backend.


In [2]:
resize_shape = (64, 64)

In [3]:
data = pd.read_csv("data/train.csv")

In [4]:
#For now, we remove new_whale
data = data[data['Id'] != 'new_whale'].reset_index(drop=True)

In [5]:
len(data)

9040

# Look at example 

In [6]:
from PIL import Image
image = Image.open('data/train/0a5c0f48.jpg')

In [7]:
image = image.resize(resize_shape)

In [8]:
# plt.imshow(image)
# plt.show()

In [9]:
np.array(image).shape

(64, 64, 3)

In [10]:
gray = np.mean(image, -1)

In [11]:
gray.shape

(64, 64)

In [12]:
# plt.imshow(random_transform(gray),cmap='gray')
# plt.show()

# All images (if small) can be held in memory.

In [13]:
file_list = data['Image']

In [14]:
def get_image(file, shape=resize_shape):
    image = Image.open('data/train/' + file)
    image = image.resize(shape)
    image = np.array(image)
    if len(image.shape) == 3:
        image = np.mean(image, -1) 
    return image

In [15]:
image_list = [get_image(f) for f in file_list]

In [16]:
data['image_array'] = image_list

In [17]:
data.head(4)

Unnamed: 0,Image,Id,image_array
0,00022e1a.jpg,w_e15442c,"[[218, 220, 224, 231, 225, 246, 250, 241, 222,..."
1,000466c4.jpg,w_1287fbc,"[[187.333333333, 191.333333333, 191.333333333,..."
2,00087b01.jpg,w_da2efe0,"[[205.0, 183.0, 214.0, 199.0, 212.0, 215.0, 21..."
3,001296d5.jpg,w_19e5482,"[[159.333333333, 164.333333333, 157.333333333,..."


# Create Test and Train

In [18]:
from sklearn.utils import shuffle
data = shuffle(data)

test_proportion = 0.8
cutoff_index = int(len(data) * test_proportion)

training_data = data.iloc[:cutoff_index].reset_index(drop=True)
test_data = data.iloc[cutoff_index:].reset_index(drop=True)

In [19]:
from collections import Counter

training_counts = Counter(training_data['Id'])
training_data['Id_count'] = training_data.apply(lambda x: training_counts.get(x["Id"]), axis=1)

test_counts = Counter(test_data['Id'])
test_data['Id_count'] = test_data.apply(lambda x: test_counts.get(x["Id"]), axis=1)

In [20]:
from random import randint

# Create generator

In [21]:
def get_different_category_example(data):
    index_1 = randint(0,len(data)-1)
    image_1 = data['image_array'][index_1]
    id_1 = data['Id'][index_1]
    id_2 = id_1
    while (id_1 == id_2):
        index_2 = randint(0,len(data)-1)
        id_2 = data['Id'][index_2]
    
    image_2 = data['image_array'][index_2]
    
    image_1 = random_transform(image_1)
    image_2 = random_transform(image_2)
    return image_1, image_2

def get_same_category_example(data):
    filtered = data[data['Id_count'] > 1].reset_index(drop=True)
    index_1 = randint(0,len(filtered)-1)
    image_1 = filtered['image_array'][index_1]
    id_1 = filtered['Id'][index_1]   
    id_2 = id_1
    relevant_indices = list(filtered.index[filtered['Id'] == id_1])
#     relevant_indices.remove(index_1)
    index_2 = np.random.choice(relevant_indices)
    image_2 = filtered['image_array'][index_2]
    
    image_1 = random_transform(image_1)
    image_2 = random_transform(image_2)
    return image_1, image_2

In [41]:
def binary_data_generator(batch_size, data, resize_shape, p=0.5):
    while True:
        targets = np.random.randint(2, size=batch_size)
        image_1_batch = np.zeros((batch_size, resize_shape[0],resize_shape[1]))
        image_2_batch = np.zeros((batch_size, resize_shape[0],resize_shape[1]))
        for i in range(batch_size):
            if(targets[i]):
                image_1_batch[i,:,:], image_2_batch[i,:,:] = get_same_category_example(data)
            else:
                image_1_batch[i,:,:], image_2_batch[i,:,:] = get_different_category_example(data)

        pairs = [image_1_batch, image_2_batch]

        yield pairs, targets 

# Create the network

In [23]:
def L2_distance(X):

    encoded_l, encoded_r = X

    # BPR loss
    loss = 1.0 - K.sigmoid(
        K.sum(user_latent * positive_item_latent, axis=-1, keepdims=True) -
        K.sum(user_latent * negative_item_latent, axis=-1, keepdims=True))

    return loss

In [63]:
input_shape = resize_shape
left_input = Input(input_shape)
right_input = Input(input_shape)

convnet = Sequential()
convnet.add(Reshape((resize_shape[0],resize_shape[1], 1,), input_shape=(resize_shape[0],resize_shape[1],),name='Reshape'))
convnet.add(Conv2D(filters=2, input_shape=(resize_shape[0],resize_shape[1],1,), kernel_size=5, activation='relu',name='conv_1'))
convnet.add(Conv2D(filters=4, kernel_size=5, activation='relu',name='conv_2'))
convnet.add(MaxPooling2D(pool_size=2,name='pool_1'))
convnet.add(Conv2D(filters=8, kernel_size=3, activation='relu',name='conv_3'))
convnet.add(Conv2D(filters=12, kernel_size=3, activation='relu',name='conv_4'))
convnet.add(MaxPooling2D(pool_size=2,name='pool_2'))
convnet.add(Flatten())
convnet.add(Dense(units=32, activation='relu',name='dense_1'))

encoded_l = convnet(left_input)
encoded_r = convnet(right_input)
L1_distance = lambda x: K.abs(x[0]-x[1])
both = merge([encoded_l,encoded_r], mode = L1_distance, output_shape=lambda x: x[0])
prediction = Dense(1,activation='sigmoid')(both)
siamese_net = Model(input=[left_input,right_input],output=prediction)

  name=name)


In [64]:
LEARNING_RATE = 0.00001
siamese_net.compile(loss="binary_crossentropy",optimizer=Adam(), metrics=['accuracy'])

# Run the network

In [65]:
BATCH_SIZE = 32
training_data_generator = binary_data_generator(BATCH_SIZE, training_data, resize_shape)

In [66]:
history = siamese_net.fit_generator(training_data_generator, verbose=1, epochs=10, steps_per_epoch=100)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10

KeyboardInterrupt: 

# Evaluation 

In [67]:
evaluation_data_generator = binary_data_generator(BATCH_SIZE, test_data, resize_shape)

evaluation_steps = 100
metric_names = siamese_net.metrics_names
metric_values = siamese_net.evaluate_generator(evaluation_data_generator, steps=evaluation_steps)
for i in range(len(siamese_net.metrics_names)):
    print(metric_names[i], ": ", metric_values[i], sep="")

loss: 0.644228013754
acc: 0.646875


# Save weights

In [29]:
import time
import os

now = time.strftime('%Y.%m.%d %H:%M:%S')
directory = "weights/" + now + "/"
if not os.path.exists(directory):
    os.makedirs(directory)


siamese_net.save_weights(directory + "siamese_weights")
convnet.save_weights(directory + "convnet_weights")

# Sub

In [80]:
def predict_category(image, model, training_images, training_categories):
    training_stack = np.stack(training_images)
    input_stack = np.repeat(image, len(training_stack), axis=0)
    binary_predictions = model.predict([training_stack,input_stack])
    binary_predictions = [x[0] for x in binary_predictions]
    
    predicted_index = np.argmax(binary_predictions)
    return training_categories[predicted_index]
            

In [81]:
#Same image should return 1. Investigate this?
#No because sigmoid of 0 is 0.5. Still seems not ideal.
# Probably this would be improved by freezing the features weights.


image_1 = training_data['image_array'][0]
image_1 = np.expand_dims(image_1,0)
image_2 = training_data['image_array'][0]
image_2 = np.expand_dims(image_2,0)

siamese_net.predict([image_1,image_2])[0][0]

0.52301055

In [32]:
training_data.head(4)

Unnamed: 0,Image,Id,image_array,Id_count
0,3ce58221.jpg,w_0626e4d,"[[206.666666667, 211.333333333, 204.666666667,...",2
1,a3844c28.jpg,w_77ee0be,"[[217.0, 214.0, 181.0, 170.0, 199.0, 217.0, 17...",2
2,6530809b.jpg,w_ba53619,"[[160.666666667, 160.666666667, 160.666666667,...",3
3,94e800b6.jpg,w_1609b19,"[[221.0, 205.0, 202.0, 196.0, 211.0, 211.0, 19...",2


In [33]:
image_1 = training_data['image_array'][0]

In [79]:
predict_category(image_1, siamese_net, training_data['image_array'], training_data['Id'])

'w_3a47dba'