In [10]:
import numpy as np
from numpy.linalg import norm
import tensorflow
from tensorflow.keras.preprocessing import image
from tensorflow.keras.layers import GlobalMaxPooling2D
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
import pickle
import os
from tqdm import tqdm

In [11]:
# creating a ResNet model. ResNet-50 is a convolutional neural network that is 50 layers deep. 
# You can load a pretrained version of the network trained on more than a million images from the ImageNet database.
# The network can classifiy objects into 1000 categories, The network output has an image input size of 224-by-224.
model = ResNet50(weights='imagenet',include_top=False,input_shape=(224,224,3))
model.trainable = False
model = tensorflow.keras.Sequential([model, GlobalMaxPooling2D()])

print(model.summary())

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
resnet50 (Functional)        (None, 7, 7, 2048)        23587712  
_________________________________________________________________
global_max_pooling2d_1 (Glob (None, 2048)              0         
Total params: 23,587,712
Trainable params: 0
Non-trainable params: 23,587,712
_________________________________________________________________
None


In [3]:
# extrating the features of the given image using the resnet model.
def extract_features(img_path,model):
    img = image.load_img(img_path,target_size=(224,224))
    img_array = image.img_to_array(img)
    expanded_img_array = np.expand_dims(img_array, axis=0)
    preprocessed_img = preprocess_input(expanded_img_array)
    result = model.predict(preprocessed_img).flatten()
    normalized_result = result / norm(result)

    return normalized_result

In [4]:
data_images_path = "Downloads/datazoids/images/"
filenames = []

# under datazoids project folder we have image folder under which there are around 85 folders each containing different number of images
# collecting all the image names present inside the images folder
for files in os.listdir(data_images_path):
    if os.path.isdir(data_images_path + files):
        for file in os.listdir(data_images_path + files):
            filenames.append(os.path.join(files, file))

In [7]:
feature_list = []
# storing the extracted features for all the given images.
for file in tqdm(filenames):
    feature_list.append(extract_features(os.path.join(data_images_path, file),model))

  0%|                                                | 0/105100 [00:00<?, ?it/s]2022-07-21 19:22:47.616154: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2)
2022-07-21 19:22:47.619360: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2022-07-21 19:22:47.773768: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
100%|█████████████████████████████████| 105100/105100 [1:22:27<00:00, 21.24it/s]


In [9]:
pickle.dump(feature_list, open('embeddings.pkl','wb'))
pickle.dump(filenames, open('filenames.pkl','wb'))

In [6]:
print(len(filenames), len(set(filenames)) == len(filenames))

105100 True


In [8]:
len(feature_list)

105100