In [1]:
import tensorflow
from tensorflow.keras.preprocessing import image                                #Helps to parse the image
from tensorflow.keras.layers import GlobalMaxPooling2D 
from tensorflow.keras.applications.resnet50 import ResNet50,preprocess_input

In [2]:
img = image.load_img('Data/Sample/Shoes.jpg',target_size=(224,224))

In [3]:
img_array = image.img_to_array(img)
img_array

array([[[ 46.,  51.,  36.],
        [ 46.,  51.,  36.],
        [ 46.,  51.,  36.],
        ...,
        [188., 187., 208.],
        [188., 187., 208.],
        [188., 187., 208.]],

       [[ 37.,  42.,  26.],
        [ 37.,  42.,  26.],
        [ 37.,  42.,  26.],
        ...,
        [188., 187., 208.],
        [188., 187., 208.],
        [188., 187., 208.]],

       [[ 35.,  39.,  24.],
        [ 35.,  39.,  24.],
        [ 35.,  39.,  24.],
        ...,
        [188., 187., 208.],
        [188., 187., 208.],
        [188., 187., 208.]],

       ...,

       [[202., 214., 239.],
        [202., 214., 239.],
        [202., 214., 239.],
        ...,
        [148., 122., 126.],
        [153., 127., 131.],
        [149., 123., 128.]],

       [[202., 214., 239.],
        [202., 214., 239.],
        [202., 214., 239.],
        ...,
        [150., 125., 129.],
        [143., 118., 122.],
        [139., 113., 117.]],

       [[202., 214., 239.],
        [202., 214., 239.],
        [202., 2

In [4]:
img_array.shape     # 3 in output implies RGB Image

(224, 224, 3)

##### Converting the image as keras workes on batches of image

In [5]:
import numpy as np 
expanded_img_array = np.expand_dims(img_array,axis=0)
print(expanded_img_array.shape)


(1, 224, 224, 3)


In [6]:
model = ResNet50(weights='imagenet',include_top=False,input_shape=(224,224,3))
model.trainable = False
model = tensorflow.keras.Sequential([model,GlobalMaxPooling2D()])

##### preprocess_input converts the input into the format which ResNet50 requires

In [7]:
preprocessed_img = preprocess_input(expanded_img_array)
print(preprocessed_img.shape)
preprocessed_img

(1, 224, 224, 3)


array([[[[-67.939    , -65.779    , -77.68     ],
         [-67.939    , -65.779    , -77.68     ],
         [-67.939    , -65.779    , -77.68     ],
         ...,
         [104.061    ,  70.221    ,  64.32     ],
         [104.061    ,  70.221    ,  64.32     ],
         [104.061    ,  70.221    ,  64.32     ]],

        [[-77.939    , -74.779    , -86.68     ],
         [-77.939    , -74.779    , -86.68     ],
         [-77.939    , -74.779    , -86.68     ],
         ...,
         [104.061    ,  70.221    ,  64.32     ],
         [104.061    ,  70.221    ,  64.32     ],
         [104.061    ,  70.221    ,  64.32     ]],

        [[-79.939    , -77.779    , -88.68     ],
         [-79.939    , -77.779    , -88.68     ],
         [-79.939    , -77.779    , -88.68     ],
         ...,
         [104.061    ,  70.221    ,  64.32     ],
         [104.061    ,  70.221    ,  64.32     ],
         [104.061    ,  70.221    ,  64.32     ]],

        ...,

        [[135.061    ,  97.221    ,  7

##### Giving the preprocessed output to resnet

In [8]:
model.predict(preprocessed_img)



array([[ 4.1819544 , 16.685442  ,  5.689307  , ...,  1.2253141 ,
         0.75266314,  9.596607  ]], dtype=float32)

In [9]:
model.predict(preprocessed_img).shape



(1, 2048)

##### Above ouput implies we have got the embeddings value for the image

In [10]:
#Converting into 1D
model.predict(preprocessed_img).flatten().shape



(2048,)

##### Normalizing - Bringing the value in range 0 - 1 by dividing each value by the L2 norm of the entire embedding value

#####  Calculating L2
       Basically L2 is square root of sum of squares of all the values (values obtained after prediction, 
       i.e model.predict(preprocessed_img)
       array([[ 2.8066926, 20.916512 ,  2.2597926, ...,  4.2524285, 15.173178 , 16.647434 ]], dtype=float32))

In [11]:
from numpy.linalg import norm
norm(model.predict(preprocessed_img).flatten())     # It gives us L2 norm



322.3872

##### OR

In [12]:
np.sqrt(np.dot(model.predict(preprocessed_img).flatten(),model.predict(preprocessed_img).flatten()))





322.3872

##### Normalizing, see the result is in 0 to 1 range

In [13]:
model.predict(preprocessed_img).flatten()/norm(model.predict(preprocessed_img).flatten())



array([0.01297184, 0.05175591, 0.01764743, ..., 0.00380075, 0.00233466,
       0.02976733], dtype=float32)