In [1]:
import os
import numpy as np
from numpy import unicode
import h5py
import glob
import cv2
from PIL import Image
from keras import applications
from keras.models import load_model
#from keras.applications.vgg16 import preprocess_input
from keras.applications.resnet50 import ResNet50
from keras.applications.resnet50 import preprocess_input
import keras.backend as K
from keras.models import Model
from keras.models import Sequential
from keras.layers import Lambda, Input, Dense, GlobalAveragePooling2D, Merge, Dropout
from keras.callbacks import ModelCheckpoint
from keras import optimizers
from keras.preprocessing import image
from sklearn.model_selection import train_test_split
from sklearn.metrics import pairwise_distances
from imutils import build_montages

Using TensorFlow backend.


In [2]:
def get_triplets(data, labels):
    pos_label, neg_label = np.random.choice(labels, 2, replace=False)
    pos_indexes = np.where(labels == pos_label)[0]
    neg_indexes = np.where(labels == neg_label)[0]
    np.random.shuffle(pos_indexes)
    np.random.shuffle(neg_indexes)
    anchor = data[pos_indexes[0]]
    positive = data[pos_indexes[-1]]
    negative = data[neg_indexes[0]]
    return (anchor, positive, negative)

In [3]:
def dump_features(image_paths, labels, hdf5_path, feature_extractor):
    db = h5py.File(hdf5_path, mode="w")
    features_shape = ((len(labels),), feature_extractor.output_shape[1:])
    features_shape = [dim for sublist in features_shape for dim in sublist]
    imageIDDB = db.create_dataset("image_ids", shape=(len(labels),),
                                  dtype=h5py.special_dtype(vlen=unicode))
    featuresDB = db.create_dataset("features",
                                   shape=features_shape, dtype="float")
    labelsDB = db.create_dataset("labels",
                                 shape=(len(labels),), dtype="int")
    for i in range(0, len(labels), 16):
        start,end = i, i+16
        image_ids = [path.split("/")[-1] for path in image_paths[start:end]]
        images = [cv2.imread(path,1) for path in image_paths[start:end]]
        features = feature_extractor.extract(images)
        imageIDDB[start:end] = image_ids
        featuresDB[start:end] = features
        labelsDB[start:end] = labels[start:end]
        print("Extracting {}/{}".format(i+1+16, len(labels)))
    db.close()

In [4]:
def extract_features(hdf5_path):
    db = h5py.File(hdf5_path,mode="r")
    features = db["features"][:]
    labels = db["labels"][:]

    return (features, labels)

def extract_embeddings(features, model):
    embeddings = model.predict([features, features, features])
    return embeddings[:,:,0]

In [5]:
def euclidean_distance(a,b):
    return K.sqrt(K.sum(K.square((a-b)), axis=1))

def cosine_distance(a, b, normalize=True):
    if normalize:
        a = K.l2_normalize(a, axis=0)
        b = K.l2_normalize(b, axis=0)
    return K.prod(K.stack([a, b], axis=1), axis=1)

def triplet_loss(y_true, anchor_positive_negative_tensor):
    anchor = anchor_positive_negative_tensor[:,:,0]
    positive = anchor_positive_negative_tensor[:,:,1]
    negative = anchor_positive_negative_tensor[:,:,2]
    Dp = euclidean_distance(anchor, positive)
    Dn = euclidean_distance(anchor, negative)

    return K.maximum(0.0, 1+K.mean(Dp-Dn))

In [8]:
class ImageNetFeatureExtractor(object):
    def __init__(self, model="resnet50", resize_to=(224, 224)):
        # MODEL_DICT = {"vgg16": VGG16, "vgg19": VGG19, "inception": InceptionV3, "resnet": ResNet50,
        #               "xception": Xception}
        # network = MODEL_DICT[model.lower()]
        self.model_name = model.lower()
        self.model = self.getModel()
        self.preprocess_input = preprocess_input
        self.imageSize = resize_to

    def extract(self, images):
        images = self.preprocess(images)
        return self.model.predict(images)

    def getModel(self):
        model = applications.ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
        intermediate_layer_model = Model(inputs=model.input, outputs=model.get_layer(index=-1).output)

        return intermediate_layer_model

    @property
    def output_shape(self):
        return self.model.compute_output_shape([[None, self.imageSize[0], self.imageSize[1], 3]])

    def resize_images(self, images):
        images = np.array([cv2.resize(image, (self.imageSize[0], self.imageSize[1])) for image in images])
        return images

    def preprocess(self, images):
        images = self.resize_images(images)
        images = self.preprocess_input(images.astype("float"))
        return images

def concat_tensors(tensors, axis=-1):
    return K.concatenate([K.expand_dims(t, axis=axis) for t in tensors])


def get_small_network(input_shape=(None, 7, 7, 2048)):
    model = Sequential()
    model.add(GlobalAveragePooling2D(input_shape=input_shape[1:]))
    model.add(Dense(512, activation="relu"))
    model.add(Dropout(0.5))
    model.add(Dense(512, activation="relu"))
    model.add(Dropout(0.25))
    model.add(Dense(256, activation="relu"))
    #model.add(Dense(128, activation="relu"))
    return model

def get_triplet_network(input_shape=(None, 7, 7, 2048)):
    base_model = get_small_network(input_shape=input_shape)

    anchor_input = Input(input_shape[1:])
    positive_input = Input(input_shape[1:])
    negative_input = Input(input_shape[1:])

    anchor_embeddings = base_model(anchor_input)
    positive_embeddings = base_model(positive_input)
    negative_embeddings = base_model(negative_input)

    output = Lambda(concat_tensors)([anchor_embeddings, positive_embeddings, negative_embeddings])
    model = Model([anchor_input, positive_input, negative_input], output)

    return model

In [9]:
image_path_list=[]
labels=[]
train_data_dir = 'D:/retinal_data_set_visioncare/original_new/'
non_dr_root = train_data_dir + '0' + '/'
non_dr_imgs = os.listdir(non_dr_root)

for non_dr in non_dr_imgs:
    if non_dr == 'Thumbs.db':
        continue
    image_path_list.append(non_dr_root + non_dr)
    labels.append(0)

mild_npdr_root = train_data_dir + '1' + '/'
mild_npdr_imgs = os.listdir(mild_npdr_root)
for mild_npdr in mild_npdr_imgs:
    if mild_npdr == 'Thumbs.db':
        continue
    image_path_list.append(mild_npdr_root + mild_npdr)
    labels.append(1)

moderate_npdr_root = train_data_dir + '2' + '/'
moderate_npdr_imgs = os.listdir(moderate_npdr_root)
for moderate_npdr in moderate_npdr_imgs:
    if moderate_npdr == 'Thumbs.db':
        continue
    image_path_list.append(moderate_npdr_root + moderate_npdr)
    labels.append(2)

severe_npdr_root = train_data_dir + '3' + '/'
severe_npdr_imgs=os.listdir(severe_npdr_root)
for severe_npdr in severe_npdr_imgs:
    if severe_npdr == 'Thumbs.db':
        continue
    image_path_list.append(severe_npdr_root + severe_npdr)
    labels.append(3)
    
pdr_root = train_data_dir + '4' + '/'
pdr_imgs=os.listdir(pdr_root)
for pdr in pdr_imgs:
    if pdr == 'Thumbs.db':
        continue
    image_path_list.append(pdr_root + pdr)
    labels.append(4)

labels = np.array(labels)

In [10]:
print(len(labels))

927


In [11]:
image_path_list

['D:/retinal_data_set_visioncare/original_new/0/10015_left.jpeg',
 'D:/retinal_data_set_visioncare/original_new/0/10015_right.jpeg',
 'D:/retinal_data_set_visioncare/original_new/0/10031_left.jpeg',
 'D:/retinal_data_set_visioncare/original_new/0/10092_right.jpeg',
 'D:/retinal_data_set_visioncare/original_new/0/10166_left.jpeg',
 'D:/retinal_data_set_visioncare/original_new/0/10166_right.jpeg',
 'D:/retinal_data_set_visioncare/original_new/0/1021_left.jpeg',
 'D:/retinal_data_set_visioncare/original_new/0/1021_right.jpeg',
 'D:/retinal_data_set_visioncare/original_new/0/10276_left.jpeg',
 'D:/retinal_data_set_visioncare/original_new/0/10284_right.jpeg',
 'D:/retinal_data_set_visioncare/original_new/0/10333_right.jpeg',
 'D:/retinal_data_set_visioncare/original_new/0/1035_left.jpeg',
 'D:/retinal_data_set_visioncare/original_new/0/1035_right.jpeg',
 'D:/retinal_data_set_visioncare/original_new/0/10400_right.jpeg',
 'D:/retinal_data_set_visioncare/original_new/0/10553_left.jpeg',
 'D:/r

In [12]:
images = [cv2.imread(path,1) for path in image_path_list[0:10]]

In [13]:
images = np.array([cv2.resize(image, (224, 224)) for image in images])

In [14]:
image_ids = [path.split("/")[-1] for path in image_path_list[0:10]]

In [45]:
source = 'D:/retinal_data_set_visioncare/newTrainValidationTestData/new_valid/0/140_right.jpeg'
#source = 'D:/retinal_data_set_visioncare/original/1/KALUARACHCHI_MRS\ J_19460131_(55043).jpg'
img = image.load_img(source, target_size=(224, 224))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)
# get the features 
extract_features = feature_extractor.getModel().predict(x)

In [46]:
extract_features

array([[[[3.0015404 , 0.00998839, 0.07474519, ..., 0.        ,
          0.66439015, 1.8831975 ]]]], dtype=float32)

In [29]:
images

[array([[[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        ...,
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]]], dtype=uint8), array([[[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0

In [16]:
feature_extractor = ImageNetFeatureExtractor(model='resnet50')

In [17]:
print ("[+] Successfully loaded pre-trained model")
dump_features(image_path_list, labels=np.array(labels),
              hdf5_path='C:/Users/hp/Downloads/data/similarity/similarity_dresnet_db.hdf5', feature_extractor=feature_extractor)

[+] Successfully loaded pre-trained model
Extracting 17/927
Extracting 33/927
Extracting 49/927
Extracting 65/927
Extracting 81/927
Extracting 97/927
Extracting 113/927
Extracting 129/927
Extracting 145/927
Extracting 161/927
Extracting 177/927
Extracting 193/927
Extracting 209/927
Extracting 225/927
Extracting 241/927
Extracting 257/927
Extracting 273/927
Extracting 289/927
Extracting 305/927
Extracting 321/927
Extracting 337/927
Extracting 353/927
Extracting 369/927
Extracting 385/927
Extracting 401/927
Extracting 417/927
Extracting 433/927
Extracting 449/927
Extracting 465/927
Extracting 481/927
Extracting 497/927
Extracting 513/927
Extracting 529/927
Extracting 545/927
Extracting 561/927
Extracting 577/927
Extracting 593/927
Extracting 609/927
Extracting 625/927
Extracting 641/927
Extracting 657/927
Extracting 673/927
Extracting 689/927
Extracting 705/927
Extracting 721/927
Extracting 737/927
Extracting 753/927
Extracting 769/927
Extracting 785/927
Extracting 801/927
Extracting 817

In [18]:
model_check_point_loc = 'C:/Users/hp/Downloads/data/similarity/resnet_dr.h5'

In [21]:
features, labels = extract_features('C:/Users/hp/Downloads/data/similarity/similarity_dresnet_db.hdf5')
print("[+] Finished loading extracted features")
model = get_triplet_network(features.shape)
data = []
for i in range(len(features)):
    anchor, positive, negative = get_triplets(features, labels)
    data.append([anchor, positive, negative])
data = np.array(data)
#  1200 = training examples
# 256 = # of features
# tripple of images - anchor, positive and negatives = 3
targets = np.zeros(shape=(927, 2048, 3))
callback = ModelCheckpoint(model_check_point_loc, period=1, monitor="val_loss")
X_train, X_test, Y_train, Y_test = train_test_split(data, targets)
model.compile(optimizers.Adam(1e-4), triplet_loss)
model.fit([X_train[:,0], X_train[:,1], X_train[:,2]], Y_train, epochs=10,
          validation_data=([X_test[:,0], X_test[:,1], X_test[:,2]], Y_test),
          callbacks=[callback], batch_size=16)

[+] Finished loading extracted features
Train on 695 samples, validate on 232 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1ae78312cf8>

In [40]:
image_ids = h5py.File('C:/Users/hp/Downloads/data/similarity/similarity_dresnet_db.hdf5', mode="r")["image_ids"][:]

In [41]:
image_ids

array(['10015_left.jpeg', '10015_right.jpeg', '10031_left.jpeg',
       '10092_right.jpeg', '10166_left.jpeg', '10166_right.jpeg',
       '1021_left.jpeg', '1021_right.jpeg', '10276_left.jpeg',
       '10284_right.jpeg', '10333_right.jpeg', '1035_left.jpeg',
       '1035_right.jpeg', '10400_right.jpeg', '10553_left.jpeg',
       '10558_left.jpeg', '10585_right.jpeg', '10790_left.jpeg',
       '10833_left.jpeg', '10833_right.jpeg', '10842_left.jpeg',
       '1088_left.jpeg', '1088_right.jpeg', '10919_left.jpeg',
       '10919_right.jpeg', '11080_left.jpeg', '11080_right.jpeg',
       '11180_left.jpeg', '11180_right.jpeg', '11216_left.jpeg',
       '1136_left.jpeg', '1136_right.jpeg', '11385_left.jpeg',
       '11385_right.jpeg', '11448_left.jpeg', '11482_right.jpeg',
       '11490_left.jpeg', '11490_right.jpeg', '11504_left.jpeg',
       '11504_right.jpeg', '1161_left.jpeg', '1161_right.jpeg',
       '11916_left.jpeg', '11916_right.jpeg', '11957_right.jpeg',
       '12155_left.jpeg', '1

In [31]:
def get_image_index(imagePath='D:/retinal_data_set_visioncare/newTrainValidationTestData/new_valid/0/140_right.jpeg'):
    filename = imagePath.split("/")[-1]
    return np.where(image_ids == filename)[0][0]

def get_image_path(index):
    for imagePath in image_path_list:
        if imagePath.rsplit('/', 1)[1] == image_ids[index]:
            return imagePath
    #return args["dataset"].strip("/")+"/"+str(image_ids[index])

In [32]:
def get_image_path(index):
    for imagePath in image_path_list:
        if imagePath.rsplit('/', 1)[1] == image_ids[index]:
            return imagePath

In [33]:
model = load_model(model_check_point_loc, custom_objects={"triplet_loss":triplet_loss})
features, labels = extract_features('C:/Users/hp/Downloads/data/similarity/similarity_dresnet_db.hdf5')
embeddings = model.predict([features, features, features])
embeddings = embeddings[:,:,2]

In [49]:
len(embeddings[0])

256

In [50]:
image_id = get_image_index()
query = embeddings[image_id]
distances = pairwise_distances(query.reshape(1,-1), embeddings)
indices = np.argsort(distances)[0][:12]
images = [cv2.imread(get_image_path(index)) for index in indices]
images = [cv2.resize(image, (200,200)) for image in images]
result = build_montages(images, (200, 200), (4,3))[0]
cv2.imshow("Result", result)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [52]:
query

array([[[[3.0015404 , 0.00998839, 0.07474519, ..., 0.        ,
          0.66439015, 1.8831975 ]]]], dtype=float32)

In [59]:
#source = 'D:/retinal_data_set_visioncare/newTrainValidationTestData/new_valid/0/140_right.jpeg'
source = 'D:/retinal_data_set_visioncare/TrainValidationData/validation/3/7531_left.jpeg'
img = image.load_img(source, target_size=(224, 224))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)
# get the features 
extract_features = feature_extractor.getModel().predict(x)

In [60]:
test_embedding = model.predict([extract_features, extract_features, extract_features])

In [61]:
test_query = test_embedding[:,:,2][0]
distances = pairwise_distances(test_query.reshape(1,-1), embeddings)
indices = np.argsort(distances)[0][:12]
images = [cv2.imread(get_image_path(index)) for index in indices]
images = [cv2.resize(image, (200,200)) for image in images]
result = build_montages(images, (200, 200), (4,3))[0]
cv2.imshow("Result", result)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [None]:
def get_similar_image_indices(embeddings, index, num_results=4):
    query = embeddings[index]
    distances = pairwise_distances(query.reshape(1, -1), embeddings)
    indices = np.argsort(distances)[0][:num_results]
    return indices

def find_num_correct(true_indices, predicted_indices):
    num_correct = 0
    for i in true_indices:
        if i in predicted_indices:
            num_correct += 1
    return num_correct

model = load_model(model_check_point_loc, custom_objects={"triplet_loss":triplet_loss})
features, labels = extract_features('C:/Users/hp/Downloads/data/similarity/similarity_db.hdf5')
embeddings = model.predict([features, features, features])
embeddings = embeddings[:,:,2]
num_correct = 0

for i in range(1200):
    similar_indices = get_similar_image_indices(embeddings, i)
    true_indices = np.where(labels==(i/4)+1)[0].tolist()
    num_correct += find_num_correct(true_indices, similar_indices)/4.0
print "Accuracy", num_correct/1000.0