In [1]:
import numpy as np
from numpy.linalg import norm
import pickle
from tqdm import tqdm, tqdm_notebook
import os
import random
import time
import math
import tensorflow as tf
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.vgg19 import VGG19, preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Flatten, Dense, Dropout, GlobalAveragePooling2D

In [2]:
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [3]:
# using vgg19 as backbone
model = VGG19(
    weights='imagenet',
    include_top=False,
    input_shape=(160, 160,3),
    pooling='max'
)

In [4]:
# dataset
dataset = r'S:\\( F I X E D   P R O J E C T )\\003. Reverse Imgae Search Engine 2 - Jeno\\dataset jeno'

In [5]:
# feature extraction function

def extract_features(image_path, model):
    input_shape = (224, 224, 3)
    img = image.load_img(
        image_path,
        target_size=(input_shape[0], input_shape[1])
    )

    # turning image to array
    img_array = image.img_to_array(img)

    # expand image
    expanded_img_array = np.expand_dims(img_array, axis=0)

    # preprocess iamge
    img_preprocess = preprocess_input(expanded_img_array)

    # extracting
    features_img = model.predict(img_preprocess)

    # flatten features
    flatten_img = features_img.flatten()
    normalized_features = flatten_img/norm(flatten_img)

    return normalized_features

In [6]:
# get list of file name
extenstions = ['.jpg', '.JPG', '.jpeg', '.JPEG', '.png', '.PNG']

def get_listfile(dir):
    list_file = []

    for root, directories, filenames in os.walk(dir):
        for filename in filenames:
            if any(ext in filename for ext in extenstions):
                filepath = os.path.join(root, filename)

                if os.path.exists(filepath):      # checking if path exist
                    list_file.append(filepath)
                else: 
                    print(filepath)
    return list_file

In [8]:
filenames = sorted(get_listfile(dataset))
print(len(filenames))

0


In [7]:
# extract using vgg19

vgg19_features = []

for i in tqdm_notebook(range(len(filenames))):
    vgg19_features.append(extract_features(filenames[i], model))

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for i in tqdm_notebook(range(len(filenames))):


  0%|          | 0/5481 [00:00<?, ?it/s]



In [8]:
pickle.dump(vgg19_features,
        open('features_vgg19.pickle', 'wb'))

In [9]:
# extract using ImageDataGenerator

dataset = r".\dataset"
batch_size = 128
datagen = ImageDataGenerator(preprocessing_function=preprocess_input)
generator = datagen.flow_from_directory(dataset, target_size=(224,224), class_mode='binary', shuffle=False)

num_imgs =len(generator.filenames)
num_epochs = int(math.ceil(num_imgs/batch_size))

imgdatagen_features = []
imgdatagen_features = model.predict_generator(generator, num_epochs)

Found 5481 images belonging to 1 classes.


  imgdatagen_features = model.predict_generator(generator, num_epochs)


In [10]:
# saving ImageDataGenerator features

for i, features in enumerate(imgdatagen_features):
    imgdatagen_features = features/norm(features)

imgdatagen_features = imgdatagen_features.reshape(len(imgdatagen_features), -1)


In [11]:
filenames = [dataset +'/'+s for s in generator.filenames]

In [12]:
pickle.dump(generator.classes, open('classname.pickle', 'wb'))
pickle.dump(filenames, open('filenames.pickle', 'wb'))
pickle.dump(imgdatagen_features,
            open('features_imgdatagen.pickle', 'wb'))

In [7]:
# extracting with finetuned model

train_samples = 5481
num_class = 1
img_width, img_height = 160, 160
dataset = r'.\dataset'

In [8]:
train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    rotation_range = 0.1,
    zoom_range=0.1,
    brightness_range=(1, 3),
    vertical_flip=True,
)


train_generator = train_datagen.flow_from_directory(
    dataset,
    target_size=(img_width, img_height),
    shuffle=True,
    batch_size=64,
    interpolation='nearest',
    seed=12345,
    class_mode='binary'
)

Found 5481 images belonging to 1 classes.


In [10]:
def model_maker():
    base_model = VGG19(include_top=False,
                           input_shape=(img_width, img_height, 3))
    for layer in base_model.layers[:]:
        layer.trainable = False
    input = Input(shape=(img_width, img_height, 3))
    custom_model = base_model(input)
    custom_model = GlobalAveragePooling2D()(custom_model)
    custom_model = Dense(64, activation='relu')(custom_model)
    custom_model = Dropout(0.2)(custom_model)
    predictions = Dense(num_class, activation='softmax')(custom_model)
    return Model(inputs=input, outputs=predictions)

In [13]:
model_finetuned = model_maker()
model_finetuned.compile(loss='cosine_similarity',
                        optimizer=tf.keras.optimizers.Adam(0.0001),
                        metrics=['acc'])

In [14]:
model_finetuned.fit_generator(
    train_generator,
    steps_per_epoch=8,
    epochs=25,
    verbose=1
)

  model_finetuned.fit_generator(


Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.callbacks.History at 0x1e5670cfee0>

In [15]:
model_finetuned.save("./mode finetuned.h5")

In [16]:
finetune_features = []
finetune_features = model_finetuned.predict_generator(train_generator, 25)

for i, finetune_feature in enumerate(finetune_features):
    finetune_features[i] = finetune_feature/norm(finetune_feature)

finetune_features = finetune_features.reshape(len(finetune_features), -1)

  finetune_features = model_finetuned.predict_generator(train_generator, 25)


In [41]:
pickle.dump(
    finetune_features,
    open('./features_finetuned_vgg16.picke', 'wb')
)