In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing import image
from tensorflow.keras.layers import GlobalMaxPooling2D
from tensorflow.keras import Sequential
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import FunctionTransformer
from sklearn import set_config
from numpy.linalg import norm
import pickle
#resnet model
from tensorflow.keras.applications.resnet50 import ResNet50,preprocess_input

In [2]:
# creating instance (object) of the model
model = ResNet50(weights='imagenet',include_top=False,input_shape=(224,224,3))

In [3]:
# disabling training the model 
model.trainable= False

#### Why do we update the last layer of the model?
while finding the images that are closest to the one uploaded by the user, we need to calculate the eucledian distance of the vector created from the uploaded image with every single vector created from the images in the database. To reduce the complexity and hence the time required for the same, we need to reduce the dimensionality of the vector of features produced by the model. By default, the model produces a matrix of 4 dimensions, the GlobalMaxPooling2D layer reduces that to 2. The model scans the image, the CNN layers go through each individual pixel, try to analyse the patterns and ultimately produce a matrix of 2048 features which the model thinks best seperates the features apart from one another.

In [4]:
# changing the top layer of the model and replacing it with GlobalMaxPooling2D layer
model = Sequential([
    model,  #uptil here the model is the same as that which we imported
    GlobalMaxPooling2D()   #the new GlobalMaxPooling2D layer
])

In [5]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 resnet50 (Functional)       (None, 7, 7, 2048)        23587712  
                                                                 
 global_max_pooling2d (Glob  (None, 2048)              0         
 alMaxPooling2D)                                                 
                                                                 
Total params: 23587712 (89.98 MB)
Trainable params: 0 (0.00 Byte)
Non-trainable params: 23587712 (89.98 MB)
_________________________________________________________________


As we can see, the output shape of the GlobalMaxPooling2D() layer is a 2D matrix instead of 4 which is what we wanted.

In [6]:
img = image.load_img('images/1163.jpg',target_size=(224,224))

In [7]:
print(type(img))

<class 'PIL.Image.Image'>


In [8]:
img_array = image.img_to_array(img)

In [9]:
img_array.shape

(224, 224, 3)

Since the keras Resnet50 model takes in a 4D matrix as an input instead of a 3D matrix, where the extra dimension refers to batch size; we have to convert this 3D matrix image into a 4D one. 

In [10]:
# adding an extra dimension to the image array
reshaped_img = np.expand_dims(img_array,axis=0)

In [11]:
reshaped_img.shape

(1, 224, 224, 3)

In [12]:
reshaped_img

array([[[[255., 255., 255.],
         [255., 255., 255.],
         [255., 255., 255.],
         ...,
         [255., 255., 255.],
         [255., 255., 255.],
         [255., 255., 255.]],

        [[255., 255., 255.],
         [255., 255., 255.],
         [255., 255., 255.],
         ...,
         [255., 255., 255.],
         [255., 255., 255.],
         [255., 255., 255.]],

        [[255., 255., 255.],
         [255., 255., 255.],
         [255., 255., 255.],
         ...,
         [255., 255., 255.],
         [255., 255., 255.],
         [255., 255., 255.]],

        ...,

        [[255., 255., 255.],
         [255., 255., 255.],
         [255., 255., 255.],
         ...,
         [255., 255., 255.],
         [255., 255., 255.],
         [255., 255., 255.]],

        [[255., 255., 255.],
         [255., 255., 255.],
         [255., 255., 255.],
         ...,
         [255., 255., 255.],
         [255., 255., 255.],
         [255., 255., 255.]],

        [[255., 255., 255.],
       

In [13]:
# changing the array slightly to make sure it comes in the format of the input of the model.
preprocessed_img_arr = preprocess_input(reshaped_img)

In [14]:
preprocessed_img_arr

array([[[[151.061  , 138.22101, 131.32   ],
         [151.061  , 138.22101, 131.32   ],
         [151.061  , 138.22101, 131.32   ],
         ...,
         [151.061  , 138.22101, 131.32   ],
         [151.061  , 138.22101, 131.32   ],
         [151.061  , 138.22101, 131.32   ]],

        [[151.061  , 138.22101, 131.32   ],
         [151.061  , 138.22101, 131.32   ],
         [151.061  , 138.22101, 131.32   ],
         ...,
         [151.061  , 138.22101, 131.32   ],
         [151.061  , 138.22101, 131.32   ],
         [151.061  , 138.22101, 131.32   ]],

        [[151.061  , 138.22101, 131.32   ],
         [151.061  , 138.22101, 131.32   ],
         [151.061  , 138.22101, 131.32   ],
         ...,
         [151.061  , 138.22101, 131.32   ],
         [151.061  , 138.22101, 131.32   ],
         [151.061  , 138.22101, 131.32   ]],

        ...,

        [[151.061  , 138.22101, 131.32   ],
         [151.061  , 138.22101, 131.32   ],
         [151.061  , 138.22101, 131.32   ],
         ...,


This **preprocessed_img_arr** becomes the input for the model which can be used to predict the 2048 features and return a vector of the same

In [15]:
def reshape_image_to_input(path):
    img = image.load_img(path,target_size=(224,224))
    img_array = image.img_to_array(img)
    reshaped_img = np.expand_dims(img_array,axis=0)
    preprocessed_img_arr = preprocess_input(reshaped_img)
    return preprocessed_img_arr

In [16]:
def flatten_image_array_to1D(array):
    array = array.flatten()
    return array

In [17]:
pr = reshape_image_to_input('images/1540.jpg')

In [18]:
pr.shape

(1, 224, 224, 3)

In [19]:
prediction = model.predict(pr)



In [20]:
pd.DataFrame(prediction)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2038,2039,2040,2041,2042,2043,2044,2045,2046,2047
0,2.585339,14.773425,1.597528,3.998313,4.783364,2.891763,13.972631,5.486446,0.094314,0.0,...,0.340045,4.152175,0.0,17.696899,2.755452,0.0,8.122338,5.244023,10.181306,4.552249


In [21]:
# scaler = ColumnTransformer([
#     ('scaler',MinMaxScaler(feature_range=(0.1, 0.9)),slice(0,2048))
# ])
def scaler_func(data):
    return data/norm(data)

In [22]:
flat_pred=flatten_image_array_to1D(prediction)
flat_pred

array([ 2.5853388, 14.773425 ,  1.5975276, ...,  5.2440233, 10.181306 ,
        4.5522494], dtype=float32)

In [23]:
arr = scaler_func(prediction)

In [24]:
arr

array([[0.00940838, 0.05376237, 0.00581361, ..., 0.01908367, 0.03705106,
        0.01656621]], dtype=float32)

In [25]:
flat_arr = flatten_image_array_to1D(arr)
flat_arr

array([0.00940838, 0.05376237, 0.00581361, ..., 0.01908367, 0.03705106,
       0.01656621], dtype=float32)

### Creating pipeline

In [26]:
# first transformer
image_array_transformer = FunctionTransformer(func=reshape_image_to_input)

In [27]:
#second transformer
def pred_with_model(data):
    data_pred = model.predict(data)
    return data_pred

model_transformer = FunctionTransformer(func=pred_with_model)    

In [28]:
#third transformer
scaler_f = FunctionTransformer(func=scaler_func)

In [29]:
#fourth transformer 
flatten = FunctionTransformer(func=flatten_image_array_to1D)

In [30]:
set_config(display='diagram')

In [31]:
# pipeline
model_pipeline = Pipeline([
    ('image_to_array',image_array_transformer),
    ('model',model_transformer),
    ('scaler',scaler_f),
    ('flatten',flatten)
])

In [32]:
predicted_arr = model_pipeline.fit_transform('images/1551.jpg')



In [33]:
predicted_arr

array([0.02201655, 0.00789968, 0.00425501, ..., 0.03311152, 0.02338822,
       0.        ], dtype=float32)

In [34]:
# exporting the pipeline
pickle.dump(model_pipeline,open('model.pkl','wb'))