In [None]:
"""

 similar_images_TL.py  (author: Anson Wong / git: ankonzoid)

 We find similar images in a database by using transfer learning
 via a pre-trained VGG image classifier. We plot the 5 most similar
 images for each image in the database, and plot the tSNE for all
 our image feature vectors.

"""

In [1]:
import sys, os
import numpy as np
from keras.preprocessing import image
from keras.models import Model
sys.path.append("src")
sys.path.append("../../data/clustering/")
from vgg19 import VGG19
from imagenet_utils import preprocess_input
from plot_utils import plot_query_answer
from sort_utils import find_topk_unique
from kNN import kNN
from tSNE import plot_tsne

import shutil

base_data = "../../data/clustering/"

Using TensorFlow backend.


In [2]:
# ================================================
# Load pre-trained model and remove higher level layers
# ================================================
print("Loading VGG19 pre-trained model...")
base_model = VGG19(weights='imagenet')
model = Model(input=base_model.input,
              output=base_model.get_layer('block4_pool').output)

Loading VGG19 pre-trained model...


  import sys


In [3]:
# ================================================
# RAW DATA : Read images and convert them to feature vectors 
# ================================================
imgs, filename_heads, X, file_full_path = [], [], [], []
path = base_data + "charts"
print("Reading images from '{}' directory...\n".format(path))
for f in os.listdir(path):
    #print(f)
    # Process filename
    filename = os.path.splitext(f)  # filename in directory
    filename_full = os.path.join(path,f)  # full path filename
    file_full_path.append(filename_full)
    head, ext = filename[0], filename[1]
    if ext.lower() not in [".jpg", ".jpeg"]:
        continue

    # Read image file
    img = image.load_img(filename_full, target_size=(224, 224))  # load
    imgs.append(np.array(img))  # image List
    filename_heads.append(head)  # filename head list

    # Pre-process for model input
    img = image.img_to_array(img)  # convert to array
    img = np.expand_dims(img, axis=0)
    img = preprocess_input(img)
    features = model.predict(img).flatten()  # features
    X.append(features)  # append feature extractor

np.save('feature_vector',X)
np.save('data_imgs',file_full_path)

Reading images from '../../data/clustering/charts' directory...



  'to RGBA images')


KeyboardInterrupt: 

In [None]:
# ================================================
# QUERY IMAGES : Read images and convert them to feature vectors 
# ================================================

def query_images(path='query_images') :

    query_features,query_img = [], []
    print("Reading images from '{}' directory...\n".format(path))
    for f in os.listdir(path):
        #print(f)
        # Process filename
        filename = os.path.splitext(f)  # filename in directory
        filename_full = os.path.join(path,f)  # full path filename
        query_img.append(filename_full)
        head, ext = filename[0], filename[1]
        if ext.lower() not in [".jpg", ".jpeg"]:
            continue

        # Read image file
        img = image.load_img(filename_full, target_size=(224, 224))  # load
        #imgs.append(np.array(img))  # image List
        #filename_heads.append(head)  # filename head list

        # Pre-process for model input
        img = image.img_to_array(img)  # convert to array
        img = np.expand_dims(img, axis=0)
        img = preprocess_input(img)
        features = model.predict(img).flatten()  # features
        query_features.append(features)  # append feature extractor

    query_features = np.array(query_features)
    
    return query_features, query_img

In [None]:
X = np.array(X)  # feature vectors
imgs = np.array(imgs)  # images
print("imgs.shape = {}".format(imgs.shape))
print("X_features.shape = {}\n".format(X.shape))

In [None]:
# ===========================
# Find k-nearest images to each image
# ===========================
n_neighbours = 15 + 1  # +1 as itself is most similar
knn = kNN()  # kNN model
knn.compile(n_neighbors=n_neighbours, algorithm="brute", metric="cosine")
knn.fit(X)

In [None]:
query_features,query_img = query_images()
print("Query_features.shape = {}\n".format(query_features.shape))
nearest_test = 'query_similar_images'

In [None]:
#n_neighbours = 20
for index,img in enumerate(query_img) :
    
    distances, indices = knn.predict(np.array([query_features[index]]))
    distances = distances.flatten()
    indices = indices.flatten()
    indices, distances = find_topk_unique(indices, distances, n_neighbours)
    #print("Nearest Neighbours index : {} ".format(indices))
    query, ext = os.path.splitext(os.path.basename(img))
    output_path = os.path.join(nearest_test,query)
    
    if not os.path.exists(output_path) :
        os.makedirs(output_path)

    for idx in  np.nditer(indices) :
        #print("File Name : {} ; Index : {}".format(query_img[idx],idx))
        shutil.copy(file_full_path[idx],output_path)
