## Import Libraries

In [None]:
!pip install anvil-uplink
import anvil.server
import anvil.media
from google.colab import drive
import time
import numpy as np
from pathlib import Path
import pandas as pd
import concurrent.futures
from keras.preprocessing.image import load_img
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
from tensorflow.keras.models import Sequential
from keras.models import Model

Collecting anvil-uplink
  Downloading anvil_uplink-0.3.41-py2.py3-none-any.whl (64 kB)
[?25l[K     |█████                           | 10 kB 19.7 MB/s eta 0:00:01[K     |██████████▏                     | 20 kB 9.5 MB/s eta 0:00:01[K     |███████████████▎                | 30 kB 7.7 MB/s eta 0:00:01[K     |████████████████████▎           | 40 kB 7.2 MB/s eta 0:00:01[K     |█████████████████████████▍      | 51 kB 4.2 MB/s eta 0:00:01[K     |██████████████████████████████▌ | 61 kB 4.5 MB/s eta 0:00:01[K     |████████████████████████████████| 64 kB 1.9 MB/s 
Collecting ws4py
  Downloading ws4py-0.5.1.tar.gz (51 kB)
[?25l[K     |██████▍                         | 10 kB 28.0 MB/s eta 0:00:01[K     |████████████▊                   | 20 kB 27.1 MB/s eta 0:00:01[K     |███████████████████▏            | 30 kB 31.8 MB/s eta 0:00:01[K     |█████████████████████████▌      | 40 kB 29.1 MB/s eta 0:00:01[K     |███████████████████████████████▉| 51 kB 32.5 MB/s eta 0:00:01[K   

# Load data from Google Drive

In [None]:
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
!unzip -q "/content/drive/My Drive/CV_Birds/FinalIndexes.zip" -d "/content"

In [None]:
!unzip -q "/content/drive/My Drive/CV_Birds/Dataset.zip" -d "/content"

In [None]:
!unzip -q "/content/drive/My Drive/CV_Birds/mirflickr.zip" -d "/content/mirflickr25k"

# Set parameters

In [None]:
GLOBAL_MODELS_FOLDER = "/content/drive/My Drive/CV_Birds/models"
IMAGE_HEIGHT = 224
IMAGE_WIDTH = 224

## Utility functions

### Create annot and load descriptors

In [None]:
def create_annot(path):
  image_list = sorted(Path(path).glob('*/*.jpg'))
  # the identity name is in the path (the name of the parent directory)
  names_list = [i.parent.name for i in image_list]  # get the identity of each image
  # keep info in a pandas DataFrame
  annot = pd.DataFrame({'identity': names_list, 'image_path': image_list})
  return annot

def concatenate_annots(list_of_paths):
  concat_annot = pd.DataFrame()
  with concurrent.futures.ThreadPoolExecutor() as executor:
    annots = [executor.submit(create_annot, path) for path in list_of_paths]
    for annot in annots:
      new_annot = annot.result()
      concat_annot = concat_annot.append(new_annot, ignore_index = True)
    return concat_annot

In [None]:
def load_descriptors(path):
  with open(path, 'rb') as file:
    return np.load(file)

def concatenate_descriptors(list_of_paths):
  concat_descriptors = None
  with concurrent.futures.ThreadPoolExecutor() as executor:
    descriptors = [executor.submit(load_descriptors, path) for path in list_of_paths]
    for descriptor in descriptors:
      new_descriptor = descriptor.result()
      if concat_descriptors is None:
        concat_descriptors = new_descriptor
      else:
        concat_descriptors = np.concatenate([concat_descriptors, new_descriptor])
    return concat_descriptors

### Compute results

In [None]:
def weighted_KNN(identities, distances, distance_measure):
  if distance_measure=="Euclidean":
    result_annot = pd.DataFrame({'identity': identities, 'score': [(1/(distance+1e-7)) for distance in distances]})
  else:
    result_annot = pd.DataFrame({'identity': identities, 'score': [(1-distance) for distance in distances]})   
  summations = result_annot.groupby('identity', as_index=False).sum()
  max = float(summations['score'].max())
  result = summations.loc[summations['score'] == max]
  result = result.reset_index()
  return result['identity'][0]

# Load models

In [None]:
model = tf.keras.models.load_model(GLOBAL_MODELS_FOLDER + '/ResNet152v2/OneDense512_Dropout_feature_extraction.h5')
feature_extractor = keras.Model(inputs=model.inputs, outputs=model.get_layer('dense').output)
autoencoder = tf.keras.models.load_model(GLOBAL_MODELS_FOLDER + '/AutoEncoder/autoencoder128_feature_extraction.keras')
encoder = keras.Model(inputs=autoencoder.inputs, outputs=autoencoder.get_layer('Encoder128').output)
feature_extraction_model = tf.keras.models.Sequential([
  feature_extractor,
  encoder                                                    
])

model = tf.keras.models.load_model(GLOBAL_MODELS_FOLDER + '/ResNet152v2/OneDense512_Dropout_fine_tuning.h5')
feature_extractor = keras.Model(inputs=model.inputs, outputs=model.get_layer('dense').output)
autoencoder = tf.keras.models.load_model(GLOBAL_MODELS_FOLDER + '/AutoEncoder/autoencoder512To128.keras')
encoder = keras.Model(inputs=autoencoder.inputs, outputs=autoencoder.get_layer('Encoder128').output)
fine_tuning_model = tf.keras.models.Sequential([
  feature_extractor,
  encoder                                                
])

## Load annots

In [None]:
annot = concatenate_annots(['/content/Dataset/train', '/content/mirflickr25k'])
annot

Unnamed: 0,identity,image_path
0,AFRICAN CROWNED CRANE,/content/Dataset/train/AFRICAN CROWNED CRANE/0...
1,AFRICAN CROWNED CRANE,/content/Dataset/train/AFRICAN CROWNED CRANE/0...
2,AFRICAN CROWNED CRANE,/content/Dataset/train/AFRICAN CROWNED CRANE/0...
3,AFRICAN CROWNED CRANE,/content/Dataset/train/AFRICAN CROWNED CRANE/0...
4,AFRICAN CROWNED CRANE,/content/Dataset/train/AFRICAN CROWNED CRANE/0...
...,...,...
72327,mirflickr,/content/mirflickr25k/mirflickr/im9995.jpg
72328,mirflickr,/content/mirflickr25k/mirflickr/im9996.jpg
72329,mirflickr,/content/mirflickr25k/mirflickr/im9997.jpg
72330,mirflickr,/content/mirflickr25k/mirflickr/im9998.jpg


## Load indexes

In [None]:
%cd "/content/drive/MyDrive/CV_Birds/Notebooks/PP-Index"
%run PPIndex.ipynb

/content/drive/.shortcut-targets-by-id/1rI5YNBuaSlCB__w522WEkHjw-nFuvIo0/CV_Birds/Notebooks/PP-Index


In [None]:
fe_tree_cosine = PrefixTree("", base_directory="/content/indexes/feature_extraction/tree/cosine", tree_file="tree_structure")
fe_tree_euclidean = PrefixTree("", base_directory="/content/indexes/feature_extraction/tree/euclidean", tree_file="tree_structure")
fe_forest_cosine = PrefixForest("", base_directory="/content/indexes/feature_extraction/forest/cosine", forest_file="forest_structure")
fe_forest_euclidean = PrefixForest("", base_directory="/content/indexes/feature_extraction/forest/euclidean", forest_file="forest_structure")

ft_tree_cosine = PrefixTree("", base_directory="/content/indexes/fine_tuning/tree/cosine", tree_file="tree_structure")
ft_tree_euclidean = PrefixTree("", base_directory="/content/indexes/fine_tuning/tree/euclidean", tree_file="tree_structure")
ft_forest_cosine = PrefixForest("", base_directory="/content/indexes/fine_tuning/forest/cosine", forest_file="forest_structure")
ft_forest_euclidean = PrefixForest("", base_directory="/content/indexes/fine_tuning/forest/euclidean", forest_file="forest_structure")

Tree retrieved from disk
Tree retrieved from disk
Forest retrieved from disk
Forest retrieved from disk
Tree retrieved from disk
Tree retrieved from disk
Forest retrieved from disk
Forest retrieved from disk


# Start server

In [None]:
def choose_index(model, distance_measure, forest):
  if model == "Feature Extraction":
    if forest:
      if distance_measure == 'Cosine Similarity':
        return fe_forest_cosine
      else:                             #fe, forest, euclidean
        return fe_forest_euclidean
    else:                               #fe, tree
      if distance_measure == 'Cosine Similarity':
        return fe_tree_cosine
      else:                             #fe, tree, euclidean
        return fe_tree_euclidean    
  else:                                 #ft
    if forest:
      if distance_measure == 'Cosine Similarity':
        return ft_forest_cosine
      else:                             #ft, forest, euclidean
        return ft_forest_euclidean
    else:                               #ft, tree
      if distance_measure == 'Cosine Similarity':
        return ft_tree_cosine
      else:                             #ft, tree, euclidean
        return ft_tree_euclidean 

In [None]:
def search_knn(index, features, k, forest, query_pert):
  if forest:
    if query_pert:
      return index.find_nearest_neighbors(features, k, perturbations=3)
    else:                                                         #forest, no pert
      return index.find_nearest_neighbors(features, k)
  else:                                                           #tree
    if query_pert:
      return index.find_nearest_neighbors_with_query_perturbation(features, k)
    else:                                                         #tree, no pert
      return index.find_nearest_neighbors(features, k)       

In [None]:
# start server
anvil.server.connect("LCKZI7Z7MMS2TA3T5Q3IFANX-YUJWRFYMLNZOAFDQ")

Connecting to wss://anvil.works/uplink
Anvil websocket open
Connected to "Default environment" as SERVER


In [None]:
@anvil.server.callable
def search(query_image, model, k, distance_measure, forest, query_pert, show_distances = False):
  start_time = time.time()
  print("****************** New search request ******************")
  print("* Model:", model, "* Metric:", distance_measure, "* k:", str(k))
  print("* Forest:", str(forest), "* Query Perturbation:", str(query_pert))
  images = []
  identities = []

  # Extract features from the query image
  extraction_time = time.time()
  with anvil.media.TempFile(query_image) as f:
    query_img = load_img(f, target_size=(IMAGE_HEIGHT, IMAGE_WIDTH)) # A PIL Image instance
    image_array = tf.keras.preprocessing.image.img_to_array(query_img)
    image_array = np.expand_dims(image_array, axis=0)  # add batch dimension
    if model == "Feature Extraction":
      features = feature_extraction_model.predict(image_array).squeeze()
    else:
      features = fine_tuning_model.predict(image_array).squeeze()
  print("1) Extracted query features:  %.3f seconds" % (time.time() - extraction_time))

  # k-nearest-neighbors search
  search_time = time.time()
  current_index = choose_index(model, distance_measure, forest)
  ids, distances = search_knn(current_index, features, k, forest, query_pert)
  print("2) k-nn completed:  %.3f seconds" % (time.time() - search_time))

  # Retrieve images
  retrieval_time = time.time()
  for i in ids:
    images.append(anvil.media.from_file(str(annot['image_path'][i]), "image/jpeg")) 
    identities.append(annot['identity'][i])       
  print("3) Images retrieved:  %.3f seconds" % (time.time() - retrieval_time))                         
  
  print("--- Search request completed:  %.3f seconds ---\n" % (time.time() - start_time))
  if show_distances:
    return weighted_KNN(identities,distances, distance_measure), images, identities, distances
  else:
    return weighted_KNN(identities,distances, distance_measure), images, identities

anvil.server.wait_forever()

****************** New search request ******************
* Model: Fine Tuning * Metric: Cosine Similarity * k: 10
* Forest: True * Query Perturbation: True
1) Extracted query features:  11.334 seconds
2) k-nn completed:  0.424 seconds
3) Images retrieved:  0.053 seconds
--- Search request completed:  11.812 seconds ---

****************** New search request ******************
* Model: Fine Tuning * Metric: Cosine Similarity * k: 38
* Forest: True * Query Perturbation: True
1) Extracted query features:  0.153 seconds
2) k-nn completed:  0.772 seconds
3) Images retrieved:  0.045 seconds
--- Search request completed:  0.971 seconds ---

