# Testing out image similarity
Downloaded images on "Range Image Downloader" and trained the model on "Range Model Training" notebooks. Now it is time to use similarity model based on given input.

In [1]:
# Necessary Packages
import pandas as pd
import numpy as np
import joblib
from PIL import Image
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
import pickle
from sklearn.metrics.pairwise import cosine_similarity

2024-01-09 14:06:47.511301: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Loading exported models and exported image features

In [2]:
# Load the model from disk
model = joblib.load("<path_to_folder/model_name.sav>")
# Load the list from the Pickle file
with open('<path_to_folder/image_features.pkl>', 'rb') as file:
    dataset_features = pickle.load(file)


In [3]:
# Same functions in the model training for feature extraction
# To extract features of input images
def load_and_preprocess_image(image_file):
    img = image.load_img(image_file, target_size=(224, 224))
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = preprocess_input(img_array)
    return img_array

def extract_features(image_file, model):
    img_array = load_and_preprocess_image(image_file)
    features = model.predict(img_array)
    return features.flatten()

In [4]:
# Processing similarity model on an example input
query_features = extract_features("<path to folder/example_input.jpg>", model)



In [5]:
# Get similarity scores
similarities = []
for filename, features in dataset_features:
    similarity = cosine_similarity([query_features], [features])[0][0]
    similarities.append((filename, similarity))

## Top 5 similar images to given input

In [6]:
# Sort the images by similarity score
similarities.sort(key=lambda x: x[1], reverse=True)

# Print the top 5 similar images
top5 = []
for i, (filename, similarity) in enumerate(similarities[:5]):
    print(f"Top {i+1}: {filename}, Similarity: {similarity}")
    top5.append(filename)

Top 1: at1.jpg, Similarity: 0.9003322124481201
Top 2: at2.jpg, Similarity: 0.5682826638221741
Top 3: 20395270.jpg, Similarity: 0.47226595878601074
Top 4: 29006695.jpg, Similarity: 0.46908971667289734
Top 5: 60260580.jpg, Similarity: 0.45500773191452026


## Gather top similar images on a dataframe

In [21]:
names = []
sim = []
for i, (filename, similarity) in enumerate(similarities[:5]):
    names.append(filename)
    sim.append(similarity)

df = pd.DataFrame({
    'Filename': names,
    'Similarity Score': sim
})
df

Unnamed: 0,Filename,Similarity Score
0,at1.jpg,0.900332
1,at2.jpg,0.568283
2,20395270.jpg,0.472266
3,29006695.jpg,0.46909
4,60260580.jpg,0.455008
