### Importing Required Library

In [15]:
import warnings
warnings.filterwarnings('ignore')

In [1]:
from tensorflow.keras.applications.resnet50 import ResNet50
from sklearn.metrics.pairwise import cosine_similarity
from os import listdir
from pickle import dump
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import img_to_array
from keras.applications.vgg16 import preprocess_input
from keras.models import Model
from nltk.translate.bleu_score import sentence_bleu

### Extracting features from each photo in the directory

In [2]:
def extract_features(directory):
    # load the model
    model = ResNet50(weights="imagenet")
    # re-structure the model
    model = Model(inputs=model.inputs, outputs=model.layers[-2].output)
    # summarize
    print(model.summary())
    # extract features from each photo
    features = dict()
    for name in listdir(directory):
        # load an image from file
        filename = directory + '/' + name     
        image = load_img(filename, target_size=(224, 224))
        # convert the image pixels to a numpy array
        image = img_to_array(image)
        # reshape data for the model
        image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
        # prepare the image for the VGG model
        image = preprocess_input(image)
        # get features
        feature = model.predict(image, verbose=0)
        # get image id
        image_id = name.split('.')[0]
        # store feature
        features[image_id] = feature
        print('>%s' % name)
    return features

In [3]:
# extract features from all images
directory = 'C:/Users/inzamam.safi/Documents/DI/Flix/image'  # image directory
features = extract_features(directory)
print('Extracted Features: %d' % len(features))
# save to file
dump(features, open('features.pkl', 'wb'))

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv1_pad (ZeroPadding2D)      (None, 230, 230, 3)  0           ['input_1[0][0]']                
                                                                                                  
 conv1_conv (Conv2D)            (None, 112, 112, 64  9472        ['conv1_pad[0][0]']              
                                )                                                                 
                                                                                              

In [4]:
import pandas as pd
df=pd.DataFrame(features.items(), columns=['id', 'features'])
df

Unnamed: 0,id,features
0,13079565VVD,"[[0.21241266, 2.8491902, 0.0895971, 0.73988545..."
1,13079565VVR,"[[0.3400833, 2.567366, 0.0, 0.99880934, 0.3355..."
2,13108390UDD,"[[0.0, 2.6054876, 0.0, 0.328603, 0.24755777, 0..."
3,13108390UDR,"[[0.23331977, 4.479315, 0.07433164, 0.06878883..."
4,13110484CLD,"[[0.0, 1.8937098, 0.37733015, 0.45714578, 0.15..."
...,...,...
123,13585501SNR,"[[0.77903384, 2.075071, 0.0, 0.022423847, 0.35..."
124,13585517PMD,"[[0.5068856, 1.4797083, 0.0, 0.38460818, 0.502..."
125,13585517PMR,"[[0.28155455, 1.3441987, 0.0, 0.702797, 0.1642..."
126,13585521PCD,"[[0.5114154, 2.4142072, 0.116344415, 1.2184261..."


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 128 entries, 0 to 127
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   id        128 non-null    object
 1   features  128 non-null    object
dtypes: object(2)
memory usage: 2.1+ KB


In [6]:
# creating an empty dictionary which take unique key and values in form of list.
from collections import defaultdict
Details = defaultdict(list)

#### Insert image id. You can take any image id from above df dataframe.
#### some example to test are: 1. 13492905VMR 2. 13492905VMD 3. 13485245LFR

#### Calculating cosine similarity of the image which user give as input with the rest of the image in the dataset

In [9]:
image_id=input("Enter the image_id ")   # Enter this image id 13492905VMR for testing
cos_sim=[]
for i,j in zip(df.features,df.id):
    Details[image_id].append(j)
    Details["cos_sim"].append(cosine_similarity(df.loc[df['id'] == image_id,'features'].values[0], i))

Enter the image_id 13492905VMR


In [10]:
# adding the cos_sim and image_id to the dataframe
df['cosine_similarity']=Details['cos_sim']
df['similar_image_id']=Details[image_id]

In [11]:
df.head(10)

Unnamed: 0,id,features,cosine_similarity,similar_image_id
0,13079565VVD,"[[0.21241266, 2.8491902, 0.0895971, 0.73988545...",[[0.80639905]],13079565VVD
1,13079565VVR,"[[0.3400833, 2.567366, 0.0, 0.99880934, 0.3355...",[[0.84714556]],13079565VVR
2,13108390UDD,"[[0.0, 2.6054876, 0.0, 0.328603, 0.24755777, 0...",[[0.76208794]],13108390UDD
3,13108390UDR,"[[0.23331977, 4.479315, 0.07433164, 0.06878883...",[[0.7000278]],13108390UDR
4,13110484CLD,"[[0.0, 1.8937098, 0.37733015, 0.45714578, 0.15...",[[0.78133243]],13110484CLD
5,13110484CLR,"[[0.0, 3.185948, 0.026236704, 0.06508469, 0.99...",[[0.81618786]],13110484CLR
6,13162479UBD,"[[0.3343742, 1.7011807, 0.0, 0.14419621, 0.225...",[[0.7105067]],13162479UBD
7,13162479UBR,"[[0.44412833, 1.229306, 0.0, 0.6462586, 0.9898...",[[0.7127886]],13162479UBR
8,13180761CUD,"[[0.084595114, 2.3488965, 0.0, 1.2299247, 0.14...",[[0.8792151]],13180761CUD
9,13180761CUR,"[[0.05851432, 2.8958082, 0.0, 1.199397, 0.0411...",[[0.82764643]],13180761CUR


In [16]:
# cleaning cosine_similarity column
df['cosine_similarity']=df['cosine_similarity'].astype(str).str.replace("\[", " ")
df['cosine_similarity']=df['cosine_similarity'].astype(str).str.replace("\]", " ")

In [17]:
# converting object data type to numeric type
df['cosine_similarity'] = pd.to_numeric(df['cosine_similarity'])

#### Finding top 10 similar image

In [18]:
df=df.sort_values(by=['cosine_similarity'], ascending=False).head(10)

In [24]:
df2=df.copy(deep=True)

In [25]:
df2["id"] = np.where(df2["id"] == image_id, image_id, image_id)

In [27]:
df2.reset_index(drop=True)

Unnamed: 0,id,features,cosine_similarity,similar_image_id
0,13492905VMR,"[[0.009841712, 3.407343, 0.054654554, 1.144901...",1.000001,13492905VMR
1,13492905VMR,"[[0.084595114, 2.3488965, 0.0, 1.2299247, 0.14...",0.879215,13180761CUD
2,13492905VMR,"[[0.3626366, 3.5694947, 0.0, 0.55084807, 0.344...",0.857729,13584969XTR
3,13492905VMR,"[[0.1338944, 2.3019264, 0.0037994627, 0.678132...",0.854845,13584688STR
4,13492905VMR,"[[0.3400833, 2.567366, 0.0, 0.99880934, 0.3355...",0.847146,13079565VVR
5,13492905VMR,"[[0.15343858, 3.2455802, 0.0, 0.72866625, 1.17...",0.844486,13492905VMD
6,13492905VMR,"[[0.26849875, 3.9035642, 0.06938506, 0.1823227...",0.8395,13584969NWD
7,13492905VMR,"[[0.050510373, 1.7438903, 0.2502293, 1.025936,...",0.836384,13585245GTD
8,13492905VMR,"[[0.22680914, 3.682602, 0.011104474, 0.2696383...",0.835005,13584969NWR
9,13492905VMR,"[[0.03714767, 2.9287462, 0.020031521, 1.073388...",0.829604,13583833LWR


In [None]:
# id column is the image id which user gave as input.
# similar_image_id are the similar image column which we get on the basis of cosine similarity.