In [None]:
from sklearn.neighbors import NearestNeighbors
import glob 
from skimage import io
import pandas as pd
import cv2
import numpy as np
import tensorflow.keras.layers as L
from tensorflow.keras.models import save_model
import tensorflow as tf
from os import path
import os

In [None]:
data_path = 'data_path'
os.chdir(data_path)

image_names = os.listdir(data_path)

def image2array(filelist):
    image_array = []
    for image in filelist:
        print('{} is loaded'.format(image, 500))
        img = io.imread(image)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, (224,224))/225
        image_array.append(img)
    image_array = np.array(image_array)
    image_array = image_array.reshape(image_array.shape[0], 224, 224, 3)
    image_array = image_array.astype('float32')
    image_array /= 225
    return image_array

train_data = image2array(image_names[:500])
print("Length of training dataset:", train_data.shape)

In [None]:
IMG_SHAPE = train_data.shape[1:]
def build_deep_autoencoder(img_shape, code_size):
    H,W,C = img_shape
    # encoder
    encoder = tf.keras.models.Sequential()
    encoder.add(L.InputLayer(img_shape))
    encoder.add(L.Conv2D(filters=32, kernel_size=(3, 3), activation='elu', padding='same'))
    encoder.add(L.MaxPooling2D(pool_size=(2, 2)))
    encoder.add(L.Conv2D(filters=64, kernel_size=(3, 3), activation='elu', padding='same'))
    encoder.add(L.MaxPooling2D(pool_size=(2, 2)))
    encoder.add(L.Conv2D(filters=128, kernel_size=(3, 3), activation='elu', padding='same'))
    encoder.add(L.MaxPooling2D(pool_size=(2, 2)))
    encoder.add(L.Conv2D(filters=256, kernel_size=(3, 3), activation='elu', padding='same'))
    encoder.add(L.MaxPooling2D(pool_size=(2, 2)))
    encoder.add(L.Flatten())
    encoder.add(L.Dense(code_size))

    # decoder
    decoder = tf.keras.models.Sequential()
    decoder.add(L.InputLayer((code_size,)))
    decoder.add(L.Dense(14*14*256))
    decoder.add(L.Reshape((14, 14, 256)))
    decoder.add(L.Conv2DTranspose(filters=128, kernel_size=(3, 3), strides=2, activation='elu', padding='same'))
    decoder.add(L.Conv2DTranspose(filters=64, kernel_size=(3, 3), strides=2, activation='elu', padding='same'))
    decoder.add(L.Conv2DTranspose(filters=32, kernel_size=(3, 3), strides=2, activation='elu', padding='same'))
    decoder.add(L.Conv2DTranspose(filters=3, kernel_size=(3, 3), strides=2, activation=None, padding='same'))
    
    return encoder, decoder


encoder, decoder = build_deep_autoencoder(IMG_SHAPE, code_size=32)
encoder.summary()
decoder.summary()

In [None]:
input_layer = L.Input(IMG_SHAPE)
code = encoder(input_layer)
reconstruction = decoder(code)

autoencoder = tf.keras.models.Model(inputs=input_layer, outputs=reconstruction)
autoencoder.compile(optimizer="adamax", loss='mse')
autoencoder.fit(x=train_data, y=train_data, epochs=10, verbose=1)

In [None]:
image_codes = encoder.predict(train_data)
assert len(image_codes) == len(train_data)

In [None]:
nei_clf = NearestNeighbors(metric="euclidean")
nei_clf.fit(image_codes)
def get_similar(i, res, n_neighbors=6):
    image = train_data[i]
    name = image_names[i]
    assert image.ndim==3
    code = encoder.predict(image[None])
    (distances,),(idx,) = nei_clf.kneighbors(code,n_neighbors=n_neighbors)
    line = [name]
    line.extend([image_names[j] for j in idx[1:]])
    res.append(' '.join(line))

In [None]:
n_neighbors = 5
res = ['image' + ' '.join(list(map(str, [n for n in range(1, n_neighbors + 1)])))]
for i in range(500):
    get_similar(i, res)
    print('{} in {}'.format(i, len(image_names, n_neighbors + 1)))

In [None]:
res