Pretrained:

- VGG-16 and VGG-19: These CNNs were trained on the ImageNet dataset, and are known for their ability to classify images with high accuracy. They can be fine-tuned for other tasks, such as image similarity.

- Inception v3: This CNN was also trained on the ImageNet dataset and is known for its efficiency and good performance on a wide range of image recognition tasks.

- ResNet-50, ResNet-101, and ResNet-152: These CNNs are trained on the ImageNet dataset and are known for their deep architecture and good performance. They are a popular choice for image similarity and other image recognition tasks.

- MobileNet: This CNN is designed to be efficient and fast, making it suitable for use on mobile devices. It is trained on the ImageNet dataset and can be used for image similarity and other image recognition tasks.

Similarity:

- cosine similarity
- euclidean distance
- manhattan distance

In [13]:
import numpy as np
import os
import cv2
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from tensorflow.keras.applications import ResNet50, VGG16, InceptionV3,
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import tensorflow as tf
from tensorflow import keras
from sklearn.metrics.pairwise import euclidean_distances, cosine_similarity
from sklearn.cluster import AgglomerativeClustering
from sklearn.neighbors import DistanceMetric

In [None]:
test_image = "dataset/test.jpg"
dataset_path = "dataset_test/dandelion/"
image_paths = os.listdir(dataset_path)

In [None]:
# loading models
vgg_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
vgg_model.trainable = False

inception_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(299, 299, 3))
inception_model.trainable = False

resnet_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
resnet_model.trainable = False

models = {'VGG16': vgg_model, 
          'InceptionV3': inception_model, 
          'ResNet50': resnet_model,
        #   'color': opencv_model1,
        #   'shapes':opencv_model2
          }

def extract_features_image(model, image):
    image = keras.preprocessing.image.img_to_array(image)
    image = tf.expand_dims(image, 0)
    if model == 'VGG16':
        image = keras.applications.vgg16.preprocess_input(image)
    elif model == 'InceptionV3':
        image = keras.applications.inception_v3.preprocess_input(image)
    elif model == 'ResNet50':
        image = keras.applications.resnet50.preprocess_input(image)
    features = model.predict(image, batch_size=1)
    return features

image_test = keras.preprocessing.image.load_img(test_image)
features_test = extract_features_image('ResNet50', image_test)
# features1 = features1.flatten()
# euclidean_dis = euclidean_distances(features1.reshape(1,-1),features2.reshape(1,-1))

In [None]:
def extract_features(model, images_path):
    features = []
    for image_path in os.listdir(images_path):
        image = keras.preprocessing.image.load_img(os.path.join(images_path,image_path))
        image_features = model.predict(preprocess_input(image))
        features.append(image_features)
    return np.array(features)

dataset_features = extract_features(vgg_model, dataset_path)
query_image = keras.preprocessing.image.load_img(test_image)
query_image_features = extract_features(vgg_model, query_image)

#compute similarity measure
similarities = []
for i, img_features in enumerate(dataset_features):
    # compute cosine similarity
    cosine_sim = cosine_similarity(query_image_features, img_features)
    # compute euclidean distance
    euclidean_dis = euclidean_distances(query_image_features, img_features)

    #manhattan distance
    manhattan_distance = manhattan_distance(query_image_features, img_features)
    
    similarities.append((i, cosine_sim))
    
#sort similarities in descending order
similarities.sort(key=lambda x:x[1], reverse=True)

In [None]:
# Load the pre-trained CNN
model = ResNet50(weights="imagenet", include_top=False)

# Load the images and extract the features using the pre-trained CNN
images = []
features = []
for image_path in image_paths:
    image_path = os.path.join(dataset_path, image_path)
    image = load_img(image_path, target_size=(224, 224))
    image = img_to_array(image)
    image = preprocess_input(image)
    images.append(image)
    feature = model.predict(image.reshape((1, *image.shape)))
    feature = feature.flatten()
    features.append(feature)

# Perform K-Means clustering
kmeans = KMeans(n_clusters=8)
kmeans.fit(features)

# Get the cluster labels for each image
labels = kmeans.predict(features)

# Visualize the clusters
for label, image in zip(labels, images):
  plt.imshow(image)
  plt.title("Cluster: {}".format(label))
  plt.show()

In [8]:
def extractFeaturesResnet(image_path):
    images = []
    features = []
    image = load_img(image_path, target_size=(224, 224))
    image = img_to_array(image)
    image = preprocess_input(image)
    images.append(image)
    feature = model.predict(image.reshape((1, *image.shape)))
    feature = feature.flatten()
    features.append(feature)
    return images, features

- Euclidean distance: 'euclidean'
- Manhattan distance: 'manhattan'
- Cosine similarity: 'cosine'
- Pearson correlation coefficient: 'correlation'
- Jaccard coefficient: 'jaccard'
- Chebyshev distance: 'chebyshev'
- Minkowski distance: 'minkowski'

In [15]:
# Distances
# Euclidean distance
euclidean_distance = DistanceMetric.get_metric('euclidean')
# Manhattan distance
manhattan_distance = DistanceMetric.get_metric('manhattan')
# Cosine similarity
# Define a function that calculates the cosine distance
def cosine_distance(X, Y=None):
    similarity = cosine_similarity(X, Y)
    return 1 - similarity

# Create a DistanceMetric instance for the cosine distance
cosine_distance = DistanceMetric.get_metric(cosine_distance)



In [10]:
test_image = "dataset/test.jpg"
test_img, features_test = extractFeaturesResnet(test_image)

