# Visual Search

## Imports

In [2]:
! pip3 install -r requirements.txt



In [9]:
# General Use
import csv
import sys
#import requests
import skimage.io
import os
import glob
import pickle
from IPython.display import display, Image, HTML
import numpy as np
import pandas as pd
import scipy.sparse as sp
from sklearn.preprocessing import LabelEncoder
import cv2
import h5py
import json
import datetime
import time
import keras

# TensorFlow
#import tensorflow as tf


os.environ["KERAS_BACKEND"] = "plaidml.keras.backend"

In [17]:
# Keras imports
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.applications.vgg19 import VGG19
from keras.applications.xception import Xception
# from keras.applications.resnet50 import ResNet50 
# from keras_applications.resnet import ResNet50
from tensorflow.keras.applications.resnet50 import ResNet50

from keras.applications.inception_resnet_v2 import InceptionResNetV2
from keras.applications.mobilenet import MobileNet
from keras.applications.inception_v3 import InceptionV3
from keras.applications.nasnet import NASNetMobile
from keras.preprocessing import image
from keras.models import Model
from keras.models import model_from_json
from keras.layers import Input, Dense, Dropout, Flatten, BatchNormalization
# from keras.utils import multi_gpu_model
from tensorflow.python.keras.utils.multi_gpu_utils import multi_gpu_model

The configuration file points to the Watchfinder dataset.

In [22]:
# load the user configs
with open('conf/conf.json') as f:
    config = json.load(f)

# config variables
model_name    = config["model"]
weights     = config["weights"]
include_top   = config["include_top"]
train_path    = config["train_path"]
features_path   = config["features_path"]
labels_path   = config["labels_path"]
test_size     = config["test_size"]
results     = config["results"]
model_path    = config["model_path"]

# start time
print ("[STATUS] start time - {}".format(datetime.datetime.now().strftime("%Y-%m-%d %H:%M")))
start = time.time()

[STATUS] start time - 2022-05-06 12:47


## Pick a pre-trained network
In this case we pick VGG16: it is simple and fast enough for the dataset in use.  
We cut it at the layer before the final classifier and create our own model version.

In [20]:
base_model = VGG16(weights=weights)
# model = Model(input=base_model.input, output=base_model.get_layer('fc1').output)
model = Model()
image_size = (224, 224)

## Extract features and encode labels
We run the pictures through the network and associate each feature set to its class.

In [23]:
# path to training dataset
train_labels = os.listdir(train_path)

# encode the labels
print ("[INFO] encoding labels...")
le = LabelEncoder()
le.fit([tl for tl in train_labels])

# variables to hold features and labels
features = []
labels   = []
file_map=[]

# loop over all the labels in the folder
count = 1
for i, label in enumerate(train_labels):
  cur_path = train_path + "/" + label
  count = 1
  for image_path in glob.glob(cur_path + "/*.jpg"):
    try:
      img = image.load_img(image_path, target_size=image_size)
      x = image.img_to_array(img)
      x = np.expand_dims(x, axis=0)
      x = preprocess_input(x)
      # extract features  
      feature = model.predict(x)
      flat = feature.flatten()
      features.append(flat)
      labels.append(label)
      file_map.append(image_path)  
      print ("[INFO] processed - " + str(count))
      count += 1
    except:
      pass
  print ("[INFO] completed label - " + label)

# encode the labels using LabelEncoder
le = LabelEncoder()
le_labels = le.fit_transform(labels)

# get the shape of training labels
print ("[STATUS] training labels: {}".format(le_labels))
print ("[STATUS] training labels shape: {}".format(le_labels.shape))

[INFO] encoding labels...
[INFO] completed label - .DS_Store
[INFO] completed label - Child1 copy.bmp
[INFO] completed label - Father1 copy.bmp
[INFO] completed label - Mother1 copy.bmp
[STATUS] training labels: []
[STATUS] training labels shape: (0,)


In [24]:
features=np.vstack(features)

ValueError: need at least one array to concatenate

In [None]:
print (features)

## Compute distances
We'll pick a query image, then compute cosine, euclidean and Hamming distances among it and the rest of the dataset.

In [None]:
idx=1001
print(file_map[idx])
Image(file_map[idx])

In [None]:
from sklearn.metrics import pairwise_distances
distances = pairwise_distances(features[idx,:].reshape(1,-1), features, metric='cosine')

In [None]:
print(distances)

Pick the 9 closest images

In [None]:
indices=np.argsort(distances)[0][:9]

In [None]:
def Display_Images(images, header=None, width="100%"): # to match Image syntax
    if type(width)==type(1): width = "{}px".format(width)
    html = ["<table style='width:{}'><tr>".format(width)]
    if header is not None:
        html += ["<th>{}</th>".format(h) for h in header] + ["</tr><tr>"]

    cols=1
    for image in images:
        print(image)
        html.append("<td><img src='{}' /></td>".format(image))
        cols+=1
        if (cols>3):
            html.append("</tr><tr>")
            cols=1
    html.append("</tr></table>")
    display(HTML(''.join(html)))

In [None]:
Display_Images([file_map[i] for i in indices],width="100%")

In [None]:
distances = pairwise_distances(features[idx,:].reshape(1,-1), features, metric='euclidean')

In [None]:
print(distances)

In [None]:
indices=np.argsort(distances)[0][:9]

In [None]:
Display_Images([file_map[i] for i in indices],width="100%")

In [None]:
hdistances = pairwise_distances(features[idx,:].reshape(1,-1), features, metric='hamming')

In [None]:
print(hdistances)

In [None]:
hindices=np.argsort(hdistances)[0][:9]

In [None]:
Display_Images([file_map[i] for i in hindices],width="100%")

In [None]:
np.amax(features)

Create a binary code explicitly

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler=MinMaxScaler()
print(scaler.fit(features))
scaled_features=scaler.transform(features)

In [None]:
binary_features = (scaled_features<0.5).astype(int)
print(binary_features)

In [None]:
scaled_query=scaler.transform(features[idx,:].reshape(1,-1))
binary_query=(scaled_query<0.5).astype(int)
print(binary_query)

In [None]:
hdistances2 = pairwise_distances(binary_query, binary_features, metric='hamming')

In [None]:
print(hdistances2)

In [None]:
hindices=np.argsort(hdistances)[0][:9]

In [None]:
Display_Images([file_map[i] for i in hindices],width="100%")

# Semantic Hashing
Now let us try with the deep fashion dataset http://mmlab.ie.cuhk.edu.hk/projects/DeepFashion.html  
We will add a layer for 64-bit semantic hashing, i.e. to characterize each image with a 64-bit number.  
There is no "binary" activation function, so we'll approximate with tanh and then do some postprocessing.  
64 bit is current limitation of MySQL bit() type on mysql5.7, which we want to use in azure to offload search.


## Files Preprocessing
We move the data into directories reflecting the 46 categories, then split into train and test.  
Pretty boring - not much to see

In [None]:
#prepare directory structures
# path to training dataset
img_path='./fashion_data/img'
train_path='./fashion_data/imgtrain'

# Copy images into fundamental categories structure
import os
source = img_path
dest = train_path
dirs = os.listdir(source)
import shutil
import numpy as np
for d in dirs:
    files=os.listdir(os.path.join(source,d))
    #print (len(files))
    #if len(files)>50:
        
    newdir = d.rsplit('_',1)[1]
    print (d.rsplit('_',1)[1])
    newdest=dest+'/'+newdir
    if not os.path.exists(newdest):
                os.mkdir(newdest)   

    for f in files:
        #try:
            full_file_name = os.path.join(d, f)

            target_file_name=os.path.join(newdest,f)
            if os.path.exists(target_file_name):
                target_file_name = os.path.splitext(target_file_name)[0]+'_'+time.strftime("%Y%m%d-%H%M%S")+os.path.splitext(target_file_name)[1]

            print(source+'/'+full_file_name)
            print(target_file_name)

            shutil.copy(source+'/'+full_file_name, target_file_name)
        #except:
        #    pass

In [None]:
#move some images to test directory
# path to training dataset
from math import floor
import random
import shutil

train_path='./fashion_data/imgtrain'
test_path='./fashion_data/imgtest'

#move some images for testing
source = train_path
dest = test_path
dirs = os.listdir(source)
count=0
for d in dirs:
    print(d)
    count +=1
    print(count)
    if ('.DS_Store' not in d):
        files=os.listdir(os.path.join(source,d))
        #select 10% random items        
        for f in random.sample(files,floor(len(files)*0.1)):
            #try:

                full_file_name = os.path.join(d, f)
                print (full_file_name)
                if not os.path.exists(dest+'/'+d):
                    os.mkdir(dest+'/'+d)
                print(source+'/'+full_file_name)
                print(dest+'/'+full_file_name)
                #fore some reason move does not work - it causes a filename does not exist...
                shutil.move(source+'/'+full_file_name, dest+'/'+full_file_name)                
            #except:
            #    pass

## Create Datasets for Model
We use the ImageDataGenerator class to create a training, test and validation dataset by iterating over our directory structure. Each directory name is a label.  
We also take advantage of some of the image processing features to introduce some "randomness" to make the classification more robust.

In [None]:
from keras.preprocessing.image import ImageDataGenerator
train_path='./fashion_data/img/train'
test_path='./fashion_data/img/val'

train_datagen = ImageDataGenerator(
    rotation_range=30.,
    shear_range=0.2,
    zoom_range=0.2,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True
    #samplewise_center=True,
    #samplewise_std_normalization=True
        #rescale=1./255
)

test_datagen = ImageDataGenerator(
    #samplewise_center=True,
    #samplewise_std_normalization=True
    #rescale=1./255
)

train_generator = train_datagen.flow_from_directory(
        train_path,
        target_size=(224, 224),
        batch_size=32,
        class_mode='categorical')

validation_generator = test_datagen.flow_from_directory(
        test_path,
        target_size=(224, 224),
        batch_size=32,
        class_mode='categorical')

In [None]:
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, TensorBoard, EarlyStopping,ReduceLROnPlateau
from keras.layers import Activation, GlobalAveragePooling2D
from keras.optimizers import SGD, Adam, rmsprop

In [None]:
# Now let us try with deep fashion dataset and add a layer for 64-bit semantic hashing
# 64 bit is current limitation of MySQL bit() type on mysql5.7, which we want to use in azure to offload search
#base_model = InceptionV3(weights='imagenet', include_top=False)
#base_model.summary()
#image_size = (299, 299)

# add a global spatial average pooling layer
#x = base_model.output
#x = GlobalAveragePooling2D()(x)
# let's add a fully-connected layer
#x = Dense(1024, activation='relu')(x)
base_model = NASNetMobile(weights='imagenet', include_top=False, pooling='avg', input_shape=(224,224,3))
#encoder=Flatten()(model.output)
#x=base_model.get_layer(base_model.layers[-2].name).output
#for layer in base_model.layers:
#    layer.trainable=False
x=base_model.output
#x=Flatten()(x)
#x=Dense(256,activation='relu',name='encoder')(x)
#x=Dropout(0.3)(x)
#x=Dense(256,activation='relu')(x)
x=Dense(64, name='hash')(x)
#x=BatchNormalization()(x)
x=Activation('hard_sigmoid',name='hash-out')(x)
#x=Dense(2048, name='decoder', activation='relu')(x)
predictions=Dense(46, activation='softmax')(x)

#for layer in base_model.layers[:-12]:
#    layer.trainable=False

#create graph of your new model
G=1
if G>1:
    with tf.device("/cpu:0"):
        model = Model(input = base_model.input, output = predictions)
    print("Training with {} GPUs".format(G))
    model=multi_gpu_model(model, gpus=G)
else:
    model = Model(input = base_model.input, output = predictions)

#compile the model
#sgd=SGD(lr=0.0001, momentum=0.5, nesterov=False)
adam=Adam(lr=0.001)
model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy'])

model.summary()    


In [None]:
base_model.summary()

In [None]:
# as of version 1.7 tensorboard consumes gpu. we don't want that
tb=TensorBoard()
tb.set_model(model)
tb.log_dir

In [None]:
# bag of tricks to control the gradient descent
# reduce the learning rate on plateaus
lr_reducer = ReduceLROnPlateau(monitor='val_acc',
                               patience=5,
                               factor=0.2,
                               cooldown=1,
                               verbose=1)
#tensorboard = TensorBoard(log_dir='./logs')
# stop if valuation accuracy plateaus 
early_stopper = EarlyStopping(monitor='val_acc',
                              patience=11,
                              verbose=1)
# save the model at every improvement
checkpoint = ModelCheckpoint("Resnet50_encoder2048_hash64_5slow.h5", 
                             monitor='val_acc', verbose=1, 
                             save_best_only=True, save_weights_only=False, 
                             mode='auto', period=1)

In [None]:
print(len(train_generator), len(validation_generator))

## Training
For time's sake, we can reduce the number of steps per epoch, but that is going to cost us in valuation accuracy as we're going to leave a subset of pictures "unseen"

In [None]:
history=model.fit_generator(
    train_generator,
    steps_per_epoch=2200,
    epochs=200,
    validation_data=validation_generator,
    validation_steps=420,
    callbacks=[ #lr_reducer,
               early_stopper, 
               checkpoint
               #tb
              ],
    workers=4,
    shuffle=True
)

In [None]:
model.save('resnet50_hash64.h5')

## Test 
We test on a different subset and compute metrics

In [None]:
test_datagen = ImageDataGenerator()
test_generator = test_datagen.flow_from_directory(
        "./fashion_data/img/test",
        target_size=(224, 224),
        batch_size=32,
        class_mode='categorical')

scores = model.evaluate_generator(test_generator, verbose=1)
print (model.metrics_names)
print (scores)



## Featurize
We calculate the hash codes of our catalog by running the pictures through the tuned network

In [None]:
hash_model=Model(input=model.input, output=model.get_layer('hash-out').output)


In [None]:
hash_model.predict_generator(test_generator, verbose=1)

In [None]:
# path to training dataset
train_path='./fashion_data/img/test'
train_labels = os.listdir(train_path)
image_size = (224, 224)
print ("[INFO] encoding image hashes...")

def binarize (a_list):
    b=''.join(str(e) for e in a_list)
    return int(b,2)

# variables to hold features and labels

features = []
labels   = []
file_map=[]
table=pd.DataFrame(columns=['id', 'path', 'label', 'code'])
                            
# loop over all the labels in the folder
count = 1
for i, label in enumerate(train_labels):
    cur_path = train_path + "/" + label 
    print(cur_path)
    for image_path in glob.glob('{}/**/*.jpg'.format(cur_path), recursive=True):
        #try:
            img = image.load_img(image_path, target_size=image_size)
            x = image.img_to_array(img)
            x = np.expand_dims(x, axis=0)
            x = preprocess_input(x)
            #generate hash using hash model
            hash_out = hash_model.predict(x)
            # a very crude way to generate a binary number, but effective
            bin_out = np.where(hash_out.flatten()>0.5,1,0)           
            features.append(bin_out)  
            labels.append(label)
            file_map.append(image_path)
            num_out=binarize(bin_out)
            print (image_path, num_out)
            table=table.append(pd.DataFrame(data=[[count, image_path, label, num_out]], columns=['id', 'path', 'label', 'code']), ignore_index=True)
            print ("[INFO] processed - " + str(count))
            count += 1
        #except:
        #    pass
    print ("[INFO] completed label - " + label)
    
    

In [None]:
print(table)

In [None]:
import mysql.connector
from mysql.connector import errorcode

# Obtain connection string information from the portal
config = {
  'host':'gmmysql.mysql.database.azure.com',
  'user':'gmarchetti@gmmysql',
  'password':'ScienceGuy1!',
  'database':'vsdb'
}


import sqlalchemy
database_username = 'gmarchetti@gmmysql'
database_password = 'ScienceGuy1!'
database_ip       = 'gmmysql.mysql.database.azure.com'
database_name     = 'vsdb'
database_connection = sqlalchemy.create_engine('mysql+mysqlconnector://{0}:{1}@{2}/{3}'.
                                               format(database_username, database_password, 
                                                      database_ip, database_name))

In [None]:
table.to_sql(con=database_connection, name='imagedata', if_exists='replace')    
    

In [None]:
#query to select top 9 with hamming distance from given hash

from sqlalchemy import text
database_connection = sqlalchemy.create_engine('mysql+mysqlconnector://{0}:{1}@{2}/{3}'.
                                               format(database_username, database_password, 
                                                      database_ip, database_name))

sql = text('SELECT id, path, label, BIT_COUNT(105117672137245588 ^ code)  as hd FROM imagedata ORDER BY hd ASC limit 9;')
result = database_connection.engine.execute(sql)
print(result)

file_map=[]
for row in result:
    print (row)
    file_map.append(row[1])



We picked a blazer. The catalog contains several categories. So far, the algorithm has at least identified that it is a blazer. However, you can see that several have distance 0. Our 64-bit binarization algorithm loses quite a bit of resolution.

In [None]:
Display_Images(file_map,width="100%")

Now only tanks, just to check

In [None]:
#query to select top 9 with hamming distance from given hash

from sqlalchemy import text
database_connection = sqlalchemy.create_engine('mysql+mysqlconnector://{0}:{1}@{2}/{3}'.
                                               format(database_username, database_password, 
                                                      database_ip, database_name))

sql = text('SELECT id, path, label, BIT_COUNT(105117109492337556 ^ code)  as hd FROM imagedata WHERE label=\'Tank\' ORDER BY hd ASC limit 9 ;')
result = database_connection.engine.execute(sql)
print(result)

file_map=[]
for row in result:
    print (row)
    file_map.append(row[1])


In [None]:
Display_Images(file_map,width="100%")


Now train  with 128 bit resolution

In [None]:
from keras.preprocessing.image import ImageDataGenerator
train_path='./fashion_data/img/train'
test_path='./fashion_data/img/val'

train_datagen = ImageDataGenerator(
    rotation_range=30.,
    shear_range=0.2,
    zoom_range=0.2,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True
    #samplewise_center=True,
    #samplewise_std_normalization=True
        #rescale=1./255
)

test_datagen = ImageDataGenerator(
    #samplewise_center=True,
    #samplewise_std_normalization=True
    #rescale=1./255
)

train_generator = train_datagen.flow_from_directory(
        train_path,
        target_size=(224, 224),
        batch_size=32,
        class_mode='categorical')

validation_generator = test_datagen.flow_from_directory(
        test_path,
        target_size=(224, 224),
        batch_size=32,
        class_mode='categorical')

In [None]:
# Now let us try with deep fashion dataset and add a layer for 64-bit semantic hashing
# 64 bit is current limitation of MySQL bit() type on mysql5.7, which we want to use in azure to offload search
#base_model = InceptionV3(weights='imagenet', include_top=False)
#base_model.summary()
#image_size = (299, 299)

# add a global spatial average pooling layer
#x = base_model.output
#x = GlobalAveragePooling2D()(x)
# let's add a fully-connected layer
#x = Dense(1024, activation='relu')(x)
base_model = ResNet50(weights='imagenet', include_top=True, input_shape=(224,224,3))
#encoder=Flatten()(model.output)
x=base_model.get_layer(base_model.layers[-2].name).output
#x=Flatten()(x)
#x=Dense(256,activation='relu',name='encoder')(x)
#x=Dropout(0.3)(x)
#x=Dense(256,activation='relu')(x)
x=Dense(1024, name='hash')(x)
#x=BatchNormalization()(x)
x=Activation('hard_sigmoid',name='hash-out')(x)
#x=Dense(2048, name='decoder', activation='relu')(x)
predictions=Dense(46, activation='softmax')(x)

#for layer in base_model.layers[:-12]:
#    layer.trainable=False

#create graph of your new model
G=1
if G>1:
    with tf.device("/cpu:0"):
        model2 = Model(input = base_model.input, output = predictions)
    print("Training with {} GPUs".format(G))
    model2=multi_gpu_model(model, gpus=G)
else:
    model2 = Model(input = base_model.input, output = predictions)

#compile the model
#sgd=SGD(lr=0.0001, momentum=0.5, nesterov=False)
adam=Adam(lr=0.001)
model2.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy'])

model2.summary()    


In [None]:
# bag of tricks to control the gradient descent
# reduce the learning rate on plateaus
lr_reducer = ReduceLROnPlateau(monitor='val_acc',
                               patience=5,
                               factor=0.2,
                               cooldown=1,
                               verbose=1)
#tensorboard = TensorBoard(log_dir='./logs')
# stop if valuation accuracy plateaus 
early_stopper = EarlyStopping(monitor='val_acc',
                              patience=11,
                              verbose=1)
# save the model at every improvement
checkpoint = ModelCheckpoint("Resnet50_encoder2048_hash1024_adam.h5", 
                             monitor='val_acc', verbose=1, 
                             save_best_only=True, save_weights_only=False, 
                             mode='auto', period=1)

In [None]:
print(len(train_generator), len(validation_generator))

In [None]:
history=model2.fit_generator(
    train_generator,
    steps_per_epoch=2200,
    epochs=200,
    validation_data=validation_generator,
    validation_steps=420,
    callbacks=[ #lr_reducer,
               early_stopper, 
               checkpoint
               #tb
              ],
    workers=12,
    shuffle=True
)