In [9]:
import os
from keras import applications
from keras import models
from keras import layers
from keras import optimizers
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense, Activation, GlobalAveragePooling2D
from keras.layers.normalization import BatchNormalization
from keras import regularizers
import matplotlib.pyplot as plt
from keras.models import load_model
from keras.utils.np_utils import to_categorical 
from keras.callbacks import ModelCheckpoint
from keras.preprocessing import image
from keras import Model
from keras import initializers
from keras.callbacks import LearningRateScheduler
from keras.utils import layer_utils, np_utils
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.datasets import make_classification
from sklearn.preprocessing import label_binarize
from sklearn.metrics import roc_curve, auc
from classification_models import ResNet18
from classification_models.resnet import preprocess_input as resnet_preprocess_input
from keras.applications.densenet import preprocess_input as densenet_preprocess_input
from keras.applications.vgg16 import preprocess_input as vgg_preprocess_input
from sklearn.cross_validation import cross_val_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support
from sklearn.utils import class_weight
import seaborn as sn
import pandas as pd
from scipy import interp
from itertools import cycle
from sklearn.decomposition import TruncatedSVD
from scipy.sparse import csr_matrix
from sklearn.externals import joblib
from sklearn.metrics import jaccard_similarity_score
from scipy import spatial

In [10]:
image_width = 224
image_height = 224

feature_scaler_filename = "D:/retinal_data_set_visioncare/Image_Retrieval/feature_scaler.save"
svd_scaler_file_name = "D:/retinal_data_set_visioncare/Image_Retrieval/svd_scaler.save"
file_path = 'D:/retinal_data_set_visioncare/Image_Retrieval/img_database.csv'

densenet_base = applications.DenseNet201(weights='imagenet', include_top=False, input_shape=(image_width, image_height, 3))
resnet_base = ResNet18(weights='imagenet', include_top=False, input_shape=(image_width, image_height, 3))
vgg_base = applications.VGG16(weights='imagenet', include_top=False, input_shape=(image_width, image_height, 3))

In [None]:
densenet_x = densenet_base.get_layer(index=-1).output
densenet_feature_extraction_layer = GlobalAveragePooling2D()(densenet_x)
densenet_model = Model(inputs=densenet_base.input, outputs=densenet_feature_extraction_layer)
densenet_model.summary()

In [None]:
resnet_x = resnet_base.get_layer(index=-1).output
resnet_feature_extraction_layer = GlobalAveragePooling2D()(resnet_x)
resnet_model = Model(inputs=resnet_base.input, outputs=resnet_feature_extraction_layer)
resnet_model.summary()

In [None]:
vgg_x = vgg_base.get_layer(index=-1).output
vgg_feature_extraction_layer = GlobalAveragePooling2D()(vgg_x)
vgg_model = Model(inputs=vgg_base.input, outputs=vgg_feature_extraction_layer)
vgg_model.summary()

In [None]:
# Returns a compiled model identical to the previous one
loaded_pretrained_deep_feature_model = load_model('D:/retinal_data_set_visioncare/models/ensemble/densenet_deep_feature_with_SVD_dr.h5')
loaded_pretrained_deep_feature_model.summary()

In [None]:
feature_extraction_layer = loaded_pretrained_deep_feature_model.get_layer('activation_25').output
feature_extract_model = Model(inputs=loaded_pretrained_deep_feature_model.input, outputs=feature_extraction_layer)
feature_extract_model.summary()

In [None]:
# Returns a compiled model identical to the previous one
loaded_deep_hash_model = load_model('D:/retinal_data_set_visioncare/Image_Retrieval/deep_hash_model.h5')
loaded_deep_hash_model.summary()

In [None]:
hashcode_extraction_layer = loaded_deep_hash_model.get_layer('activation_5').output
hashcode_extract_model = Model(inputs=loaded_deep_hash_model.input, outputs=hashcode_extraction_layer)
hashcode_extract_model.summary()

In [None]:
norm_scalar = joblib.load(feature_scaler_filename) 
norm_truncated_opt_svd = joblib.load(svd_scaler_file_name)

In [None]:
# retrieve deep hash code for a given query image
def get_deep_feature_hashcode_for_query_img(source):
    source = retina_root + relative_path
    img = image.load_img(source, target_size=(image_width, image_height))
    img_x = image.img_to_array(img)
    img_x = np.expand_dims(img_x, axis=0)

    # densenet201 - feature extraction
    densenet201_x = densenet_preprocess_input(img_x)
    densenet201_extract_features = densenet_model.predict(densenet201_x)
    flattern_feature_vector = densenet201_extract_features.flatten()

    # resnet18 - feature extraction
    resnet18_x = resnet_preprocess_input(img_x)
    resnet18_extract_features = resnet_model.predict(resnet18_x)
    resnet18_feature_vector = resnet18_extract_features.flatten()

    # vgg16 - feature extraction
    vgg16_x = vgg_preprocess_input(img_x)
    vgg16_extract_features = vgg_model.predict(vgg16_x)
    vgg16_feature_vector = vgg16_extract_features.flatten()

    flattern_feature_vector = np.concatenate((flattern_feature_vector, resnet18_feature_vector, vgg16_feature_vector))
    scaled_flattern_feature_vector = norm_scalar.transform(np.array([flattern_feature_vector]))
    transformed_flattern_feature_vector = norm_truncated_opt_svd.transform(scaled_flattern_feature_vector)
    ensemble_compressed_feature = feature_extract_model.predict(transformed_flattern_feature_vector)
    ensemble_compressed_feature_np = np.array([ensemble_compressed_feature.flatten()])
    deep_feature = [val for val in ensemble_compressed_feature.flatten()]
    # load feature extractor model for sigmoid layer model as a feature extractor
    deep_hash_proba = hashcode_extract_model.predict(ensemble_compressed_feature_np)
    deep_hash_code = [1 if val >= 0.5 else 0 for val in deep_hash_proba.flatten()]

    return deep_feature, deep_hash_code

In [2]:
def get_jaccard_similarity_score(row_hash_code, query_hashcode):
    jaccard_sim = jaccard_similarity_score(row_hash_code, query_hashcode)
    return jaccard_sim

In [22]:
def get_cosine_similarity_score(row_deep_feature, query_feature):
    cosine_sim = 1 - spatial.distance.cosine(row_deep_feature, query_feature)
    return cosine_sim

In [21]:
# hamming distance with hashcodes in database
# load csv and compare
dataset = pd.read_csv(file_path, delimiter=',', converters=dict(deep_features=literal_eval, hash_code=literal_eval))

In [19]:
dataset.head()

Unnamed: 0,img_path,deep_features,hash_code
0,D:/retinal_data_set_visioncare/New_Train_Test_...,[[0. 0.2817384 0.17564283 0.16437872 ...,"[0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, ..."
1,D:/retinal_data_set_visioncare/New_Train_Test_...,[[0. 0. 0.35064828 0.20183267 ...,"[1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, ..."
2,D:/retinal_data_set_visioncare/New_Train_Test_...,[[0. 0. 0. 0.03911216 ...,"[0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, ..."
3,D:/retinal_data_set_visioncare/New_Train_Test_...,[[0. 0.20498309 0. 0.12469342 ...,"[1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, ..."
4,D:/retinal_data_set_visioncare/New_Train_Test_...,[[0. 0.34591606 0.22904815 0.13837777 ...,"[1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, ..."


In [None]:
# retrieve deep fature and deep hashcode for query image
query_img_path = 'D:/retinal_data_set_visioncare/New_Train_Test_Data/train/3/3444_right.jpeg'
query_deep_feature, query_deep_hash_code = get_deep_feature_hashcode_for_query_img(query_img_path)

dataset["jaccard_sim"] = dataset.apply(lambda x: get_jaccard_similarity_score(x['hash_code'], query_deep_hash_code), axis=1)

# sort descending order by jaccard_sim and retreive top k=10 images
#dataset = dataset.sort_values(by='jaccard_sim', ascending=False).head(10)
threshold = 0.5
dataset = dataset.loc[dataset['jaccard_sim'] >= threshold]

# calculate cosine simmilarity by feature space and again sort by cosine simillarity and retrieve the results
dataset["cosine_sim"] = dataset.apply(lambda x: get_cosine_similarity_score(x['deep_features'], query_deep_feature), axis=1)
dataset = dataset.sort_values(by='cosine_sim', ascending=False).head(10)

In [1]:
from sklearn.metrics import jaccard_similarity_score

print(jaccard_similarity_score([1, 1, 0, 0], [1, 0, 1, 0]))

0.5


In [3]:
from ast import literal_eval
from io import StringIO
import pandas as pd

txt = """col1|col2
a|[1,2,3]
b|[4,5,6]"""

df = pd.read_csv(StringIO(txt), sep='|', converters=dict(col2=literal_eval))
print(df)


  col1       col2
0    a  [1, 2, 3]
1    b  [4, 5, 6]


In [4]:
df['newcolumn'] = df.apply(lambda x: get_jaccard_similarity_score(x['col2'], [1, 1, 1]), axis=1)
print(df)

  col1       col2  newcolumn
0    a  [1, 2, 3]   0.333333
1    b  [4, 5, 6]   0.000000


In [9]:
df.sort_values(by='newcolumn', ascending=False).head(10)

Unnamed: 0,col1,col2,newcolumn
0,a,"[1, 2, 3]",0.333333
1,b,"[4, 5, 6]",0.0


In [21]:
from scipy import spatial

dataSetI = [1, 0, -1]
dataSetII = [-1,-1, 0]
result = 1 - spatial.distance.cosine(dataSetI, dataSetII)
print(result)

-0.5


In [23]:
df['new_column'] = df.apply(lambda x: get_cosine_similarity_score(x['col2'], [1, 1, 1]), axis=1)
print(df)

  col1       col2  newcolumn  new_column
0    a  [1, 2, 3]   0.333333    0.925820
1    b  [4, 5, 6]   0.000000    0.986928


In [24]:
threshold = 0.3
df = df.loc[df['newcolumn'] >= threshold]
print(df)

  col1       col2  newcolumn  new_column
0    a  [1, 2, 3]   0.333333     0.92582
