In [2]:
from google.colab import drive
drive.mount("/content/gdrive")

Mounted at /content/gdrive
time: 24.6 s (started: 2023-02-19 11:16:14 +00:00)


In [3]:
%cd /content/gdrive/MyDrive/ahrefs

/content/gdrive/MyDrive/ahrefs
time: 13.8 ms (started: 2023-02-19 11:16:38 +00:00)


In [1]:
!pip install ipython-autotime
%load_ext autotime

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting ipython-autotime
  Downloading ipython_autotime-0.3.1-py2.py3-none-any.whl (6.8 kB)
Collecting jedi>=0.10
  Downloading jedi-0.18.2-py2.py3-none-any.whl (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m30.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: jedi, ipython-autotime
Successfully installed ipython-autotime-0.3.1 jedi-0.18.2
time: 336 µs (started: 2023-02-19 11:16:14 +00:00)


# Downloading caltech101

In [25]:
!mkdir -p /datasets
!gdown "https://drive.google.com/uc?id=137RyRjvTBkBiIfeYBNZBtViDHQ6_Ewsp&confirm=t" --output /datasets/caltech101.tar.gz

Downloading...
From: https://drive.google.com/uc?id=137RyRjvTBkBiIfeYBNZBtViDHQ6_Ewsp&confirm=t
To: /datasets/caltech101.tar.gz
100% 132M/132M [00:00<00:00, 236MB/s]
time: 2.13 s (started: 2023-02-19 11:53:03 +00:00)


In [26]:
!tar -xvzf /datasets/caltech101.tar.gz --directory /datasets

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
101_ObjectCategories/chair/image_0006.jpg
101_ObjectCategories/chair/image_0007.jpg
101_ObjectCategories/chair/image_0008.jpg
101_ObjectCategories/chair/image_0010.jpg
101_ObjectCategories/chair/image_0011.jpg
101_ObjectCategories/chair/image_0012.jpg
101_ObjectCategories/chair/image_0013.jpg
101_ObjectCategories/chair/image_0014.jpg
101_ObjectCategories/chair/image_0016.jpg
101_ObjectCategories/chair/image_0017.jpg
101_ObjectCategories/chair/image_0018.jpg
101_ObjectCategories/chair/image_0019.jpg
101_ObjectCategories/chair/image_0020.jpg
101_ObjectCategories/chair/image_0022.jpg
101_ObjectCategories/chair/image_0023.jpg
101_ObjectCategories/chair/image_0024.jpg
101_ObjectCategories/chair/image_0025.jpg
101_ObjectCategories/chair/image_0026.jpg
101_ObjectCategories/chair/image_0028.jpg
101_ObjectCategories/chair/image_0029.jpg
101_ObjectCategories/chair/image_0030.jpg
101_ObjectCategories/chair/image_0031.jpg
101_ObjectC

In [27]:
!mv /datasets/101_ObjectCategories /datasets/caltech101
!rm -rf /datasets/caltech101/101_ObjectCategories

time: 327 ms (started: 2023-02-19 11:53:14 +00:00)


# Model Selection

In [30]:
import numpy as np
from numpy import dot
from numpy.linalg import norm
import pickle
from tqdm import tqdm, tqdm_notebook
import os
import random
import time
import math
import tensorflow
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras.applications.mobilenet import MobileNet
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Flatten, Dense, Dropout, GlobalAveragePooling2D


time: 1.98 ms (started: 2023-02-19 11:54:02 +00:00)


In [29]:
def model_picker(name):
    if (name == 'vgg16'):
        model = VGG16(weights='imagenet',
                      include_top=False,
                      input_shape=(224, 224, 3),
                      pooling='max')
    elif (name == 'vgg19'):
        model = VGG19(weights='imagenet',
                      include_top=False,
                      input_shape=(224, 224, 3),
                      pooling='max')
    elif (name == 'mobilenet'):
        model = MobileNet(weights='imagenet',
                          include_top=False,
                          input_shape=(224, 224, 3),
                          pooling='max',
                          depth_multiplier=1,
                          alpha=1)
    elif (name == 'inception'):
        model = InceptionV3(weights='imagenet',
                            include_top=False,
                            input_shape=(224, 224, 3),
                            pooling='max')
    elif (name == 'resnet'):
        model = ResNet50(weights='imagenet',
                         include_top=False,
                         input_shape=(224, 224, 3),
                        pooling='max')
    elif (name == 'xception'):
        model = Xception(weights='imagenet',
                         include_top=False,
                         input_shape=(224, 224, 3),
                         pooling='max')
    else:
        print("Specified model not available")
    return model

model_architecture = 'resnet'  # Resnet has largest fraction of non-zero values compared to other keras imagenet pretrained models: https://youtu.be/-5BAepEE9I8?t=524
model = model_picker(model_architecture)

time: 1.49 s (started: 2023-02-19 11:53:50 +00:00)


# Feature extraction (one by one)

In [50]:
def extract_features(img_path, model):
    input_shape = (224, 224, 3)
    img = image.load_img(img_path,
                         target_size=(input_shape[0], input_shape[1]))
    img_array = image.img_to_array(img)
    expanded_img_array = np.expand_dims(img_array, axis=0)   # create batch of 1 
    preprocessed_img = preprocess_input(expanded_img_array)
    features = model.predict(preprocessed_img)
    flattened_features = features.flatten()

    return flattened_features

time: 1.45 ms (started: 2023-02-19 12:08:47 +00:00)


# Checking extracted features from same class {class1, image1}, {class1, image2}  are closer than features from different class (class1, image1), {class2, image1}
- Euclidean Distance
- Dot product
- Cosine similarity

In [None]:
bike1 = extract_features('/datasets/caltech101/Motorbikes/image_0001.jpg', model)
bike2 = extract_features('/datasets/caltech101/Motorbikes/image_0002.jpg', model)
plane1 = extract_features('/datasets/caltech101/airplanes/image_0001.jpg', model)

Total length of features for one image:  2048
time: 11.4 s (started: 2023-02-18 23:24:50 +00:00)


In [None]:
def compare_metrics(c1ass1image1, class1image2, class2image1):
  def cosine_similarity(list_1, list_2):
    cos_sim = dot(list_1, list_2) / (norm(list_1) * norm(list_2))
    return cos_sim

  print('Euclidean: ', np.linalg.norm(c1ass1image1-class1image2), np.linalg.norm(c1ass1image1-class2image1))  # first pair of images should have smaller distance
  print('Dot product:', c1ass1image1@class1image2, c1ass1image1@class2image1) # first pair of images should have larger dot product
  print('Cosine Similarity: ', cosine_similarity(c1ass1image1,class1image2), cosine_similarity(c1ass1image1,class2image1)) # first pair of images should have larger cosine similarity

compare_metrics(bike1, bike2, plane1)

Euclidean:  180.9626 295.6774
Dot product: 92075.17 58472.14
Cosine Similarity:  0.84936386 0.5724963
time: 5.46 ms (started: 2023-02-19 00:13:28 +00:00)


# Getting all filenames
- Used for visualization later

In [None]:
!du -a /datasets/caltech101

In [None]:
!du -a /datasets/caltech101 | cut -d/ -f4 | sort | uniq -c | sort -nr

    801 airplanes
    799 Motorbikes
    469 BACKGROUND_Google
    436 Faces_easy
    436 Faces
    240 watch
    201 Leopards
    129 bonsai
    124 car_side
    115 ketch
    108 chandelier
    101 hawksbill
    100 grand_piano
     99 brain
     92 butterfly
     89 helicopter
     88 menorah
     87 trilobite
     87 starfish
     87 kangaroo
     86 sunflower
     86 ewer
     86 buddha
     85 scorpion
     83 revolver
     82 laptop
     81 ibis
     79 llama
     77 minaret
     76 umbrella
     76 electric_guitar
     74 crab
     71 crayfish
     70 cougar_face
     69 dragonfly
     68 flamingo
     68 ferry
     68 dalmatian
     67 lotus
     66 dolphin
     65 stop_sign
     65 soccer_ball
     65 joshua_tree
     65 euphonium
     65 elephant
     64 schooner
     63 chair
     62 lamp
     61 yin_yang
     60 wheelchair
     60 stegosaurus
     60 rhino
     60 cellphone
     58 sea_horse
     58 pyramid
     58 cup
     57 windsor_chair
     56 nautilus
     56 accordi

In [10]:
extensions = ['.jpg', '.JPG', '.jpeg', '.JPEG', '.png', '.PNG']

def get_file_list(root_dir):
    file_list = []
    for root, directories, filenames in os.walk(root_dir):
        for filename in filenames:
            if any(ext in filename for ext in extensions):
                filepath = os.path.join(root, filename)
                if os.path.exists(filepath):
                  file_list.append(filepath)
                else:
                  print(filepath)
    return file_list

root_dir = '/datasets/caltech101'
filenames = sorted(get_file_list(root_dir))
print(len(filenames))

9144
time: 104 ms (started: 2023-02-19 11:21:19 +00:00)


# Feature extraction (tensorflow batches)

In [None]:
datagen = tensorflow.keras.preprocessing.image.ImageDataGenerator(preprocessing_function=preprocess_input)

generator = datagen.flow_from_directory(root_dir,
                                        target_size=(224, 224),
                                        class_mode=None,
                                        shuffle=False,
                                        batch_size=64)

feature_list = model.predict(generator, verbose=1)

Found 9144 images belonging to 102 classes.
time: 34.3 s (started: 2023-02-18 23:25:03 +00:00)


In [None]:
feature_list.shape

(9144, 2048)

time: 3.6 ms (started: 2023-02-18 23:25:38 +00:00)


In [None]:
!mkdir -p /features
pickle.dump(generator.classes, open('/features/class_ids-caltech101.pickle','wb'))
pickle.dump(filenames, open('/features/filenames-caltech101.pickle', 'wb'))
pickle.dump(feature_list,open('/features/features-caltech101-' + model_architecture + '.pickle', 'wb'))

time: 290 ms (started: 2023-02-18 23:25:38 +00:00)


# Training Model from scratch (without final dense layers)

- Aim to learn better embeddings specific to this data
- No dense layers are added except final one to classify so the model can put all weight on convolutional layers instead of dense layers that will be truncated anyway after training for feature extraction  

In [None]:
train_datagen = ImageDataGenerator(preprocessing_function=preprocess_input,
                                   rotation_range=20,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2,
                                   zoom_range=0.2)

train_generator = train_datagen.flow_from_directory(root_dir,
                                                    target_size=(224, 224),
                                                    shuffle=True,
                                                    seed=12345,
                                                    class_mode='categorical')

NUM_CLASSES = 102

model = ResNet50(weights='imagenet', include_top=False,input_shape = (224,224,3))
input = Input(shape=(224, 224, 3))
x = model(input)
x = GlobalAveragePooling2D()(x)
# No extra dense or dropout layers so heavy lifting for classification accuracy rests on convolution layers
x = Dense(NUM_CLASSES, activation='softmax')(x)
model_similarity_optimized = Model(inputs=input, outputs=x)

Found 9144 images belonging to 102 classes.
time: 2.16 s (started: 2023-02-18 23:40:00 +00:00)


In [None]:
model_similarity_optimized.compile(loss='categorical_crossentropy',
              optimizer=tensorflow.keras.optimizers.Adam(0.001),
              metrics=['acc'])
model_similarity_optimized.fit(train_generator,
                               batch_size=64,
                               epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fb5d06338e0>

time: 27min 1s (started: 2023-02-18 23:40:08 +00:00)


In [None]:
!mkdir -p /models
model_similarity_optimized.save('/models/model-scratch.h5')

time: 1.42 s (started: 2023-02-19 00:07:21 +00:00)


In [None]:
model_similarity_optimized.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 resnet50 (Functional)       (None, 7, 7, 2048)        23587712  
                                                                 
 global_average_pooling2d (G  (None, 2048)             0         
 lobalAveragePooling2D)                                          
                                                                 
 dense (Dense)               (None, 102)               208998    
                                                                 
Total params: 23,796,710
Trainable params: 23,743,590
Non-trainable params: 53,120
_________________________________________________________________
time: 47 ms (started: 2023-02-19 00:07:26 +00:00)


In [None]:
model = Model(model_similarity_optimized.input, model_similarity_optimized.layers[-2].output)
model.summary()

Model: "model_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 resnet50 (Functional)       (None, 7, 7, 2048)        23587712  
                                                                 
 global_average_pooling2d (G  (None, 2048)             0         
 lobalAveragePooling2D)                                          
                                                                 
Total params: 23,587,712
Trainable params: 23,534,592
Non-trainable params: 53,120
_________________________________________________________________
time: 70.2 ms (started: 2023-02-19 00:09:38 +00:00)


In [None]:
datagen = tensorflow.keras.preprocessing.image.ImageDataGenerator(preprocessing_function=preprocess_input)

generator_refined = datagen.flow_from_directory(root_dir,
                                        target_size=(224, 224),
                                        class_mode=None,
                                        shuffle=False,
                                        batch_size=64)

feature_list_scratch = model.predict(generator, verbose=1)
feature_list_scratch.shape # ensure 2048 dimensions

Found 9144 images belonging to 102 classes.


(9144, 2048)

time: 35 s (started: 2023-02-19 00:12:30 +00:00)


In [None]:
pickle.dump(feature_list_scratch,open('/features/features-caltech101-' + model_architecture + 'scratch' + '.pickle', 'wb'))

time: 167 ms (started: 2023-02-19 00:13:05 +00:00)


In [None]:
bike1 = extract_features('/datasets/caltech101/Motorbikes/image_0001.jpg', model)
bike2 = extract_features('/datasets/caltech101/Motorbikes/image_0002.jpg', model)
plane1 = extract_features('/datasets/caltech101/airplanes/image_0001.jpg', model)

time: 290 ms (started: 2023-02-19 00:26:05 +00:00)


In [None]:
compare_metrics(bike1, bike2, plane1)  

Euclidean:  8.322131 36.717457
Dot product: 643.44617 868.3439
Cosine Similarity:  0.9653718 0.73361313
time: 1.81 ms (started: 2023-02-19 00:26:10 +00:00)


## Interpretations
- it's strange that the dot product between bike1, plane1 is higher than bike1, bike2 (these two are visually almost the same)
- Euclidean distances both shrunk significantly compared to directly using resnet
- Cosine Similarity both increased 
- Aiming to get small distance/big similarity between first pair and big gap in metrics between pair1 and pair2


In [None]:
cougarbody1 = extract_features('/datasets/caltech101/cougar_body/image_0002.jpg', model)
cougarbody2 = extract_features('/datasets/caltech101/cougar_body/image_0001.jpg', model)
cougarface1 = extract_features('/datasets/caltech101/cougar_face/image_0001.jpg', model)

time: 214 ms (started: 2023-02-19 00:24:30 +00:00)


In [None]:
compare_metrics(cougarbody1,cougarbody2,cougarface1)  

Euclidean:  11.592525 20.711376
Dot product: 491.89868 267.79065
Cosine Similarity:  0.88109666 0.56944656
time: 2.31 ms (started: 2023-02-19 00:24:33 +00:00)


In [None]:
!cp -r /features /content/gdrive/MyDrive/ahrefs/features
!cp -r /models /content/gdrive/MyDrive/ahrefs/models

time: 1.55 s (started: 2023-02-19 00:29:51 +00:00)


# Fine tuning model
- Because didn't train from scratch long enough to give better accuracy against true labels compared to pre-trained model

In [9]:
def model_maker():
    base_model = ResNet50(include_top=False,
                           input_shape=(IMG_WIDTH, IMG_HEIGHT, 3))
    for layer in base_model.layers[:]:
        layer.trainable = False
    input = Input(shape=(IMG_WIDTH, IMG_HEIGHT, 3))
    custom_model = base_model(input)
    custom_model = GlobalAveragePooling2D()(custom_model)
    custom_model = Dense(64, activation='relu')(custom_model)
    custom_model = Dropout(0.5)(custom_model)
    predictions = Dense(NUM_CLASSES, activation='softmax')(custom_model)
    return Model(inputs=input, outputs=predictions)

time: 1.78 ms (started: 2023-02-19 11:20:15 +00:00)


In [13]:
NUM_CLASSES = 102
IMG_WIDTH, IMG_HEIGHT = 224, 224

train_datagen = ImageDataGenerator(preprocessing_function=preprocess_input,
                                   rotation_range=20,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2,
                                   zoom_range=0.2)

train_generator = train_datagen.flow_from_directory(root_dir,
                                                    target_size=(IMG_WIDTH,
                                                                 IMG_HEIGHT),
                                                    shuffle=True,
                                                    seed=12345,
                                                    class_mode='categorical')

model_finetuned = model_maker()
model_finetuned.compile(loss='categorical_crossentropy',
              optimizer=tensorflow.keras.optimizers.Adam(0.001),
              metrics=['acc'])
model_finetuned.fit(train_generator,
                    epochs=10)

Found 9144 images belonging to 102 classes.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f75f4185ee0>

time: 20min 37s (started: 2023-02-19 11:23:58 +00:00)


In [18]:
!mkdir -p /models
model_finetuned.save('/models/model-resnet-finetuned.h5')

time: 805 ms (started: 2023-02-19 11:49:01 +00:00)


In [39]:
model_finetuned_extractor = Model(model_finetuned.input, model_finetuned.layers[-4].output)
model_finetuned_extractor.summary()

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_7 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 resnet50 (Functional)       (None, 7, 7, 2048)        23587712  
                                                                 
 global_average_pooling2d_1   (None, 2048)             0         
 (GlobalAveragePooling2D)                                        
                                                                 
Total params: 23,587,712
Trainable params: 0
Non-trainable params: 23,587,712
_________________________________________________________________
time: 63.2 ms (started: 2023-02-19 11:59:32 +00:00)


In [40]:
datagen = tensorflow.keras.preprocessing.image.ImageDataGenerator(preprocessing_function=preprocess_input)

root_dir='/datasets/caltech101'

generator_refined = datagen.flow_from_directory(root_dir,
                                        target_size=(224, 224),
                                        class_mode=None,
                                        shuffle=False,
                                        batch_size=64)

feature_list_finetuned = model_finetuned_extractor.predict(generator_refined, verbose=1)
feature_list_finetuned.shape # ensure 2048 dimensions

Found 9144 images belonging to 102 classes.


(9144, 2048)

time: 36.8 s (started: 2023-02-19 11:59:44 +00:00)


In [43]:
!mkdir -p /features
pickle.dump(feature_list_finetuned,open('/features/features-caltech101-resnet-finetuned.pickle', 'wb')) 

time: 319 ms (started: 2023-02-19 12:00:55 +00:00)


In [44]:
!cp /features/features-caltech101-resnet-finetuned.pickle /content/gdrive/MyDrive/ahrefs/features/features-caltech101-resnet-finetuned.pickle

time: 262 ms (started: 2023-02-19 12:03:08 +00:00)


In [51]:
bike1 = extract_features('/datasets/caltech101/Motorbikes/image_0001.jpg', model_finetuned_extractor)
bike2 = extract_features('/datasets/caltech101/Motorbikes/image_0002.jpg', model_finetuned_extractor)
plane1 = extract_features('/datasets/caltech101/airplanes/image_0001.jpg', model_finetuned_extractor)

def compare_metrics(c1ass1image1, class1image2, class2image1):
  def cosine_similarity(list_1, list_2):
    cos_sim = dot(list_1, list_2) / (norm(list_1) * norm(list_2))
    return cos_sim

  print('Euclidean: ', np.linalg.norm(c1ass1image1-class1image2), np.linalg.norm(c1ass1image1-class2image1))  # first pair of images should have smaller distance
  print('Dot product:', c1ass1image1@class1image2, c1ass1image1@class2image1) # first pair of images should have larger dot product
  print('Cosine Similarity: ', cosine_similarity(c1ass1image1,class1image2), cosine_similarity(c1ass1image1,class2image1)) # first pair of images should have larger cosine similarity

compare_metrics(bike1, bike2, plane1)

Euclidean:  20.39609 36.72865
Dot product: 1248.0394 548.33295
Cosine Similarity:  0.8586098 0.45174485
time: 650 ms (started: 2023-02-19 12:08:53 +00:00)


# VOC2012
- Harder image collection where multiple labels exist per image

In [45]:
!wget -P /datasets "http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar"

--2023-02-19 12:03:54--  http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
Resolving host.robots.ox.ac.uk (host.robots.ox.ac.uk)... 129.67.94.152
Connecting to host.robots.ox.ac.uk (host.robots.ox.ac.uk)|129.67.94.152|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1999639040 (1.9G) [application/x-tar]
Saving to: ‘/datasets/VOCtrainval_11-May-2012.tar’


2023-02-19 12:04:15 (93.7 MB/s) - ‘/datasets/VOCtrainval_11-May-2012.tar’ saved [1999639040/1999639040]

time: 20.6 s (started: 2023-02-19 12:03:54 +00:00)


In [46]:
!tar -xvf /datasets/VOCtrainval_11-May-2012.tar --directory /datasets

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
VOCdevkit/VOC2012/SegmentationClass/2008_001876.png
VOCdevkit/VOC2012/SegmentationClass/2008_001882.png
VOCdevkit/VOC2012/SegmentationClass/2008_001885.png
VOCdevkit/VOC2012/SegmentationClass/2008_001895.png
VOCdevkit/VOC2012/SegmentationClass/2008_001896.png
VOCdevkit/VOC2012/SegmentationClass/2008_001926.png
VOCdevkit/VOC2012/SegmentationClass/2008_001966.png
VOCdevkit/VOC2012/SegmentationClass/2008_001971.png
VOCdevkit/VOC2012/SegmentationClass/2008_001992.png
VOCdevkit/VOC2012/SegmentationClass/2008_001997.png
VOCdevkit/VOC2012/SegmentationClass/2008_002032.png
VOCdevkit/VOC2012/SegmentationClass/2008_002043.png
VOCdevkit/VOC2012/SegmentationClass/2008_002064.png
VOCdevkit/VOC2012/SegmentationClass/2008_002066.png
VOCdevkit/VOC2012/SegmentationClass/2008_002067.png
VOCdevkit/VOC2012/SegmentationClass/2008_002073.png
VOCdevkit/VOC2012/SegmentationClass/2008_002079.png
VOCdevkit/VOC2012/SegmentationClass/2008_002080.png

In [47]:
cat /datasets/VOCdevkit/VOC2012/Annotations/2011_002897.xml

<annotation>
	<filename>2011_002897.jpg</filename>
	<folder>VOC2012</folder>
	<object>
		<name>bottle</name>
		<bndbox>
			<xmax>500</xmax>
			<xmin>473</xmin>
			<ymax>284</ymax>
			<ymin>202</ymin>
		</bndbox>
		<difficult>0</difficult>
		<occluded>0</occluded>
		<pose>Unspecified</pose>
		<truncated>1</truncated>
	</object>
	<segmented>0</segmented>
	<size>
		<depth>3</depth>
		<height>375</height>
		<width>500</width>
	</size>
	<source>
		<annotation>PASCAL VOC2011</annotation>
		<database>The VOC2011 Database</database>
		<image>flickr</image>
	</source>
</annotation>
time: 174 ms (started: 2023-02-19 12:04:28 +00:00)


In [48]:
root_dir = '/datasets/VOCdevkit/VOC2012'

datagen = tensorflow.keras.preprocessing.image.ImageDataGenerator(preprocessing_function=preprocess_input)

generator = datagen.flow_from_directory(root_dir,
                                        target_size=(224, 224),
                                        class_mode=None,
                                        shuffle=False,
                                        batch_size=64,
                                        classes=["JPEGImages"]) # not really a class, just a hack to filter out unnecessary folders without creating class-based subfolders 

model_architecture = 'resnet'  # Resnet has largest fraction of non-zero values compared to other keras imagenet pretrained models: https://youtu.be/-5BAepEE9I8?t=524
model = model_picker(model_architecture)

voc_feature_list = model.predict(generator, verbose=1)


Found 17125 images belonging to 1 classes.
time: 1min 52s (started: 2023-02-19 12:04:28 +00:00)


In [52]:
voc_feature_list.shape

(17125, 2048)

time: 3.48 ms (started: 2023-02-19 12:09:19 +00:00)


## Checking pre-trained resnet works well on VOC2012

In [53]:
vocplane1 = extract_features('/datasets/VOCdevkit/VOC2012/JPEGImages/2007_000032.jpg', model)
vocplane2 = extract_features('/datasets/VOCdevkit/VOC2012/JPEGImages/2007_000033.jpg', model)
voccomputer1 = extract_features('/datasets/VOCdevkit/VOC2012/JPEGImages/2007_000039.jpg', model)

compare_metrics(vocplane1,vocplane2,voccomputer1)

Euclidean:  305.6701 327.21844
Dot product: 92978.72 77942.03
Cosine Similarity:  0.6756949 0.61155486
time: 215 ms (started: 2023-02-19 12:09:20 +00:00)


In [None]:
voc_filenames = ['/datasets/VOCdevkit/VOC2012' + filepath for filepath in sorted(os.listdir('/datasets/VOCdevkit/VOC2012/JPEGImages'))]

In [None]:
pickle.dump(voc_filenames, open('/features/filenames-voc2012.pickle', 'wb'))
pickle.dump(voc_feature_list,open('/features/features-voc2012-' + model_architecture + '.pickle', 'wb'))

In [None]:
# whole folder will be overwritten, may be better to cp individual files
!cp -r /features /content/gdrive/MyDrive/ahrefs 