# AutoEncoder performance analysis
Just to compare the results of our index we need to perform the same experiments in a brute force fashion.

# Import libraries

In [None]:
!pip install scikit-learn-extra
from google.colab import drive
from pathlib import Path
import pandas as pd
import numpy as np
import concurrent.futures
import sklearn_extra
from PIL import Image
import sklearn 
import sklearn.metrics
from tqdm.auto import tqdm
import time
import csv
import os
import heapq

Collecting scikit-learn-extra
  Downloading scikit_learn_extra-0.2.0-cp37-cp37m-manylinux2010_x86_64.whl (1.7 MB)
[?25l[K     |▏                               | 10 kB 18.5 MB/s eta 0:00:01[K     |▍                               | 20 kB 24.0 MB/s eta 0:00:01[K     |▋                               | 30 kB 28.0 MB/s eta 0:00:01[K     |▊                               | 40 kB 31.1 MB/s eta 0:00:01[K     |█                               | 51 kB 34.4 MB/s eta 0:00:01[K     |█▏                              | 61 kB 30.9 MB/s eta 0:00:01[K     |█▍                              | 71 kB 26.9 MB/s eta 0:00:01[K     |█▌                              | 81 kB 27.4 MB/s eta 0:00:01[K     |█▊                              | 92 kB 29.0 MB/s eta 0:00:01[K     |██                              | 102 kB 29.7 MB/s eta 0:00:01[K     |██                              | 112 kB 29.7 MB/s eta 0:00:01[K     |██▎                             | 122 kB 29.7 MB/s eta 0:00:01[K     |██▌            

# Connect to Google Drive

In [None]:
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


# Set parameters

In [None]:
n = 250
PERFORMANCE_FOLDER_FINE_TUNING = '/content/drive/MyDrive/CV_Birds/performance/fine_tuning/no_index/AutoEncoder'
PERFORMANCE_FOLDER_FEATURE_EXTRACTION = '/content/drive/MyDrive/CV_Birds/performance/feature_extraction/no_index/AutoEncoder'

# Utility functions

In [None]:
def create_annot(path):
  image_list = list(Path(path).glob('*/*.jpg'))
  # the identity name is in the path (the name of the parent directory)
  names_list = [i.parent.name for i in image_list]  # get the identity of each image
  # keep info in a pandas DataFrame
  annot = pd.DataFrame({'identity': names_list, 'image_path': image_list})
  return annot

def concatenate_annots(list_of_paths):
  concat_annot = pd.DataFrame()
  with concurrent.futures.ThreadPoolExecutor() as executor:
    annots = [executor.submit(create_annot, path) for path in list_of_paths]
    for annot in annots:
      new_annot = annot.result()
      concat_annot = concat_annot.append(new_annot, ignore_index = True)
    return concat_annot

def load_descriptors(path):
  with open(path, 'rb') as file:
    return np.load(file)

def concatenate_descriptors(list_of_paths):
  concat_descriptors = None
  with concurrent.futures.ThreadPoolExecutor() as executor:
    descriptors = [executor.submit(load_descriptors, path) for path in list_of_paths]
    for descriptor in descriptors:
      new_descriptor = descriptor.result()
      if concat_descriptors is None:
        concat_descriptors = new_descriptor
      else:
        concat_descriptors = np.concatenate([concat_descriptors, new_descriptor])
    return concat_descriptors

def cosine_distance(o1, o2):
  similarity = sklearn.metrics.pairwise.cosine_similarity(X=o1, Y=o2)
  return np.subtract(np.ones(similarity.shape), similarity)

def euclidean_distance(o1, o2):
  return sklearn.metrics.pairwise_distances(X=o1, Y=o2, metric='euclidean')

def k_nn_query(query_index, k, distance_metric = 'euclidean'):
  heap = []
  best_indexes = rank_distances = []
  query_descriptor = query_descriptors[query_index]
  for index in range(len(db_annot)):
    descriptor = db_descriptors[index]
    if distance_metric == 'euclidean':
      distance = euclidean_distance(query_descriptor.reshape(1,-1), descriptor.reshape(1,-1)).squeeze()
    elif distance_metric == 'cosine':
      distance = cosine_distance(query_descriptor.reshape(1,-1), descriptor.reshape(1,-1)).squeeze()
    else:
      return None, None  

    heapq.heappush(heap, (distance, index))

  smallest = heapq.nsmallest(k, heap)
  res = list(zip(*smallest))
  best_indexes = res[1]
  rank_distances = res[0]
  return best_indexes, rank_distances

def compute_ap_at_n(query_index, n, distance_metric = 'euclidean'):
  start_time = time.time()
  retrieved_indexes, _ = k_nn_query(query_index, n, distance_metric)
  end_time = time.time()
  query_identity = query_annot['identity'][query_index]
  GTP = len(db_annot.loc[db_annot['identity'] == query_identity])
  relevant = 0
  precision_summation = 0
  for k, id in enumerate(retrieved_indexes):
    if db_annot['identity'][id] == query_identity: # relevant result
      relevant = relevant + 1
      precision_at_k = relevant/(k+1)
      precision_summation = precision_summation + precision_at_k
  return precision_summation/GTP, end_time - start_time

def compute_aps (queries_indexes, distance_metric = 'euclidean'):
  aps_at_n = []
  query_times = []
  classes = []
  for i, query_index in enumerate(queries_indexes):
    print("Query number ", i, ", CLASS: ", query_annot['identity'][query_index])
    ap, t = compute_ap_at_n(query_index, n, distance_metric = distance_metric)
    classes.append(query_annot['identity'][query_index])
    aps_at_n.append(ap)
    query_times.append(t)
  return classes, aps_at_n, query_times

def save_results(dir, file_name, results):
  with open(os.path.join(dir, file_name + ".csv"), 'w') as f:
    writer = csv.writer(f)
    # write the header
    writer.writerow(["CLASS", "AP", "QUERY TIME"])
    # write the data
    for r in results:
      writer.writerow(r) 

# Test performance
We will now test the performance of the ResNet152v2 with a brute force approach. 

In [None]:
query_annot = create_annot('/content/drive/MyDrive/CV_Birds/test')
query_annot

Unnamed: 0,identity,image_path
0,AFRICAN CROWNED CRANE,/content/drive/MyDrive/CV_Birds/test/AFRICAN C...
1,AFRICAN CROWNED CRANE,/content/drive/MyDrive/CV_Birds/test/AFRICAN C...
2,AFRICAN CROWNED CRANE,/content/drive/MyDrive/CV_Birds/test/AFRICAN C...
3,AFRICAN CROWNED CRANE,/content/drive/MyDrive/CV_Birds/test/AFRICAN C...
4,AFRICAN CROWNED CRANE,/content/drive/MyDrive/CV_Birds/test/AFRICAN C...
...,...,...
1620,YELLOW HEADED BLACKBIRD,/content/drive/MyDrive/CV_Birds/test/YELLOW HE...
1621,YELLOW HEADED BLACKBIRD,/content/drive/MyDrive/CV_Birds/test/YELLOW HE...
1622,YELLOW HEADED BLACKBIRD,/content/drive/MyDrive/CV_Birds/test/YELLOW HE...
1623,YELLOW HEADED BLACKBIRD,/content/drive/MyDrive/CV_Birds/test/YELLOW HE...


To run our tests we select only the first image of each species within the test set. Please note that within the test set we have 5 images per species.

In [None]:
queries_indexes = [x for x in range(325*5) if x%5 == 0]
print(query_annot['identity'][queries_indexes])

0             AFRICAN CROWNED CRANE
5                 AFRICAN FIREFINCH
10                        ALBATROSS
15             ALEXANDRINE PARAKEET
20                  AMERICAN AVOCET
                   ...             
1600       WILSONS BIRD OF PARADISE
1605                      WOOD DUCK
1610    YELLOW BELLIED FLOWERPECKER
1615                 YELLOW CACIQUE
1620        YELLOW HEADED BLACKBIRD
Name: identity, Length: 325, dtype: object


## Fine Tuning descriptors

### One dense layer of 256 neurons and dropout

In [None]:
db_annot = concatenate_annots(['/content/drive/MyDrive/CV_Birds/train', '/content/drive/MyDrive/CV_Birds/mirflickr25k'])
db_annot

Unnamed: 0,identity,image_path
0,AFRICAN CROWNED CRANE,/content/drive/MyDrive/CV_Birds/train/AFRICAN ...
1,AFRICAN CROWNED CRANE,/content/drive/MyDrive/CV_Birds/train/AFRICAN ...
2,AFRICAN CROWNED CRANE,/content/drive/MyDrive/CV_Birds/train/AFRICAN ...
3,AFRICAN CROWNED CRANE,/content/drive/MyDrive/CV_Birds/train/AFRICAN ...
4,AFRICAN CROWNED CRANE,/content/drive/MyDrive/CV_Birds/train/AFRICAN ...
...,...,...
72327,mirflickr,/content/drive/MyDrive/CV_Birds/mirflickr25k/m...
72328,mirflickr,/content/drive/MyDrive/CV_Birds/mirflickr25k/m...
72329,mirflickr,/content/drive/MyDrive/CV_Birds/mirflickr25k/m...
72330,mirflickr,/content/drive/MyDrive/CV_Birds/mirflickr25k/m...


In [None]:
db_descriptors = concatenate_descriptors(['/content/drive/MyDrive/CV_Birds/features/training/AutoEncoder/512to256withPace64.npy', '/content/drive/MyDrive/CV_Birds/features/distractor/AutoEncoder/512to256withPace64.npy'])
db_descriptors.shape

(72332, 256)

In [None]:
query_descriptors = load_descriptors('/content/drive/MyDrive/CV_Birds/features/test/AutoEncoder/512to256withPace64.npy')
query_descriptors.shape

(1625, 256)

#### Cosine distance

In [None]:
classes, aps_at_n, query_times = compute_aps (queries_indexes, distance_metric='cosine')
save_results(PERFORMANCE_FOLDER_FINE_TUNING, 'AutoEncoder256E_cosine', zip(classes, aps_at_n, query_times))

In [None]:
mAP_at_n = np.mean(aps_at_n, axis=0)
average_query_time = np.mean(query_times, axis=0)
print(f'Mean Average Precision at {n}: {mAP_at_n}')
print(f'Average Query Time: {average_query_time}')

Mean Average Precision at 250: 0.7637816641777381
Average Query Time: 21.258438777189987


In [None]:
pd.read_csv(PERFORMANCE_FOLDER_FINE_TUNING + '/AutoEncoder256E_cosine.csv') 

Unnamed: 0,CLASS,AP,QUERY TIME
0,AFRICAN CROWNED CRANE,0.960418,21.550630
1,AFRICAN FIREFINCH,0.817694,21.480790
2,ALBATROSS,0.863120,21.506847
3,ALEXANDRINE PARAKEET,0.877293,22.613635
4,AMERICAN AVOCET,0.986850,20.929028
...,...,...,...
320,WILSONS BIRD OF PARADISE,0.891742,21.281807
321,WOOD DUCK,0.845139,21.107791
322,YELLOW BELLIED FLOWERPECKER,0.408662,21.102937
323,YELLOW CACIQUE,0.959303,21.415152


### One dense layer of 128 neurons and dropout

In [None]:
db_annot = concatenate_annots(['/content/drive/MyDrive/CV_Birds/train', '/content/drive/MyDrive/CV_Birds/mirflickr25k'])
db_annot

Unnamed: 0,identity,image_path
0,AFRICAN CROWNED CRANE,/content/drive/MyDrive/CV_Birds/train/AFRICAN ...
1,AFRICAN CROWNED CRANE,/content/drive/MyDrive/CV_Birds/train/AFRICAN ...
2,AFRICAN CROWNED CRANE,/content/drive/MyDrive/CV_Birds/train/AFRICAN ...
3,AFRICAN CROWNED CRANE,/content/drive/MyDrive/CV_Birds/train/AFRICAN ...
4,AFRICAN CROWNED CRANE,/content/drive/MyDrive/CV_Birds/train/AFRICAN ...
...,...,...
72327,mirflickr,/content/drive/MyDrive/CV_Birds/mirflickr25k/m...
72328,mirflickr,/content/drive/MyDrive/CV_Birds/mirflickr25k/m...
72329,mirflickr,/content/drive/MyDrive/CV_Birds/mirflickr25k/m...
72330,mirflickr,/content/drive/MyDrive/CV_Birds/mirflickr25k/m...


In [None]:
db_descriptors = concatenate_descriptors(['/content/drive/MyDrive/CV_Birds/features/training/AutoEncoder/512to128withPace64.npy', '/content/drive/MyDrive/CV_Birds/features/distractor/AutoEncoder/512to128withPace64.npy'])
db_descriptors.shape

(72332, 128)

In [None]:
query_descriptors = load_descriptors('/content/drive/MyDrive/CV_Birds/features/test/AutoEncoder/512to128withPace64.npy')
query_descriptors.shape

(1625, 128)

#### Cosine distance

In [None]:
classes, aps_at_n, query_times = compute_aps (queries_indexes, distance_metric='cosine')
save_results(PERFORMANCE_FOLDER_FINE_TUNING, 'AutoEncoder128E_cosine', zip(classes, aps_at_n, query_times))

In [None]:
mAP_at_n = np.mean(aps_at_n, axis=0)
average_query_time = np.mean(query_times, axis=0)
print(f'Mean Average Precision at {n}: {mAP_at_n}')
print(f'Average Query Time: {average_query_time}')

Mean Average Precision at 250: 0.7503732237470497
Average Query Time: 19.95543401498061


In [None]:
pd.read_csv(PERFORMANCE_FOLDER_FINE_TUNING + '/AutoEncoder128E_cosine.csv') 

Unnamed: 0,CLASS,AP,QUERY TIME
0,AFRICAN CROWNED CRANE,0.966160,19.946817
1,AFRICAN FIREFINCH,0.823357,20.355816
2,ALBATROSS,0.858209,20.011400
3,ALEXANDRINE PARAKEET,0.864935,20.244975
4,AMERICAN AVOCET,0.986521,20.662600
...,...,...,...
320,WILSONS BIRD OF PARADISE,0.810169,19.650017
321,WOOD DUCK,0.830232,20.503800
322,YELLOW BELLIED FLOWERPECKER,0.463365,19.842379
323,YELLOW CACIQUE,0.933791,20.266129


## Feature Extraction descriptors

### One dense layer of 256 neurons and dropout

In [None]:
db_annot = concatenate_annots(['/content/drive/MyDrive/CV_Birds/train', '/content/drive/MyDrive/CV_Birds/mirflickr25k'])
db_annot

Unnamed: 0,identity,image_path
0,AFRICAN CROWNED CRANE,/content/drive/MyDrive/CV_Birds/train/AFRICAN ...
1,AFRICAN CROWNED CRANE,/content/drive/MyDrive/CV_Birds/train/AFRICAN ...
2,AFRICAN CROWNED CRANE,/content/drive/MyDrive/CV_Birds/train/AFRICAN ...
3,AFRICAN CROWNED CRANE,/content/drive/MyDrive/CV_Birds/train/AFRICAN ...
4,AFRICAN CROWNED CRANE,/content/drive/MyDrive/CV_Birds/train/AFRICAN ...
...,...,...
72327,mirflickr,/content/drive/MyDrive/CV_Birds/mirflickr25k/m...
72328,mirflickr,/content/drive/MyDrive/CV_Birds/mirflickr25k/m...
72329,mirflickr,/content/drive/MyDrive/CV_Birds/mirflickr25k/m...
72330,mirflickr,/content/drive/MyDrive/CV_Birds/mirflickr25k/m...


In [None]:
db_descriptors = concatenate_descriptors(['/content/drive/MyDrive/CV_Birds/features/training/AutoEncoder/512to256withPace64_feature_extraction.npy', '/content/drive/MyDrive/CV_Birds/features/distractor/AutoEncoder/512to256withPace64_feature_extraction.npy'])
db_descriptors.shape

(72332, 256)

In [None]:
query_descriptors = load_descriptors('/content/drive/MyDrive/CV_Birds/features/test/AutoEncoder/512to256withPace64_feature_extraction.npy')
query_descriptors.shape

(1625, 256)

#### Cosine distance

In [None]:
classes, aps_at_n, query_times = compute_aps (queries_indexes, distance_metric='cosine')
save_results(PERFORMANCE_FOLDER_FEATURE_EXTRACTION, 'AutoEncoder256_feature_extraction_cosine', zip(classes, aps_at_n, query_times))

In [None]:
save_results(PERFORMANCE_FOLDER_FEATURE_EXTRACTION, 'AutoEncoder256_feature_extraction_cosine', zip(classes, aps_at_n, query_times))

In [None]:
mAP_at_n = np.mean(aps_at_n, axis=0)
average_query_time = np.mean(query_times, axis=0)
print(f'Mean Average Precision at {n}: {mAP_at_n}')
print(f'Average Query Time: {average_query_time}')

Mean Average Precision at 250: 0.5952245808133548
Average Query Time: 22.04446351344769


In [None]:
pd.read_csv(PERFORMANCE_FOLDER_FEATURE_EXTRACTION + '/AutoEncoder256_feature_extraction_cosine.csv') 

Unnamed: 0,CLASS,AP,QUERY TIME
0,AFRICAN CROWNED CRANE,0.896830,22.429219
1,AFRICAN FIREFINCH,0.628593,22.419486
2,ALBATROSS,0.610685,22.614145
3,ALEXANDRINE PARAKEET,0.643777,22.660943
4,AMERICAN AVOCET,0.981661,24.058343
...,...,...,...
320,WILSONS BIRD OF PARADISE,0.837743,19.921329
321,WOOD DUCK,0.800288,19.850944
322,YELLOW BELLIED FLOWERPECKER,0.115405,19.108148
323,YELLOW CACIQUE,0.823831,19.084859


### One dense layer of 128 neurons and dropout

In [None]:
db_annot = concatenate_annots(['/content/drive/MyDrive/CV_Birds/train', '/content/drive/MyDrive/CV_Birds/mirflickr25k'])
db_annot

Unnamed: 0,identity,image_path
0,AFRICAN CROWNED CRANE,/content/drive/MyDrive/CV_Birds/train/AFRICAN ...
1,AFRICAN CROWNED CRANE,/content/drive/MyDrive/CV_Birds/train/AFRICAN ...
2,AFRICAN CROWNED CRANE,/content/drive/MyDrive/CV_Birds/train/AFRICAN ...
3,AFRICAN CROWNED CRANE,/content/drive/MyDrive/CV_Birds/train/AFRICAN ...
4,AFRICAN CROWNED CRANE,/content/drive/MyDrive/CV_Birds/train/AFRICAN ...
...,...,...
72327,mirflickr,/content/drive/MyDrive/CV_Birds/mirflickr25k/m...
72328,mirflickr,/content/drive/MyDrive/CV_Birds/mirflickr25k/m...
72329,mirflickr,/content/drive/MyDrive/CV_Birds/mirflickr25k/m...
72330,mirflickr,/content/drive/MyDrive/CV_Birds/mirflickr25k/m...


In [None]:
db_descriptors = concatenate_descriptors(['/content/drive/MyDrive/CV_Birds/features/training/AutoEncoder/512to128withPace64.npy', '/content/drive/MyDrive/CV_Birds/features/distractor/AutoEncoder/512to128withPace64.npy'])
db_descriptors.shape

(72332, 128)

In [None]:
query_descriptors = load_descriptors('/content/drive/MyDrive/CV_Birds/features/test/AutoEncoder/512to128withPace64.npy')
query_descriptors.shape

(1625, 128)

#### Cosine distance

In [None]:
classes, aps_at_n, query_times = compute_aps (queries_indexes, distance_metric='cosine')
save_results(PERFORMANCE_FOLDER_FEATURE_EXTRACTION, 'AutoEncoder128_feature_extraction_cosine', zip(classes, aps_at_n, query_times))

In [None]:
mAP_at_n = np.mean(aps_at_n, axis=0)
average_query_time = np.mean(query_times, axis=0)
print(f'Mean Average Precision at {n}: {mAP_at_n}')
print(f'Average Query Time: {average_query_time}')

Mean Average Precision at 250: 0.5599754442545181
Average Query Time: 20.984311929115883


In [None]:
pd.read_csv(PERFORMANCE_FOLDER_FEATURE_EXTRACTION + '/AutoEncoder128_feature_extraction_cosine.csv') 

Unnamed: 0,CLASS,AP,QUERY TIME
0,AFRICAN CROWNED CRANE,0.839976,21.593090
1,AFRICAN FIREFINCH,0.529318,21.449335
2,ALBATROSS,0.520869,20.868598
3,ALEXANDRINE PARAKEET,0.654019,21.346611
4,AMERICAN AVOCET,0.979440,21.071121
...,...,...,...
320,WILSONS BIRD OF PARADISE,0.788551,20.522257
321,WOOD DUCK,0.780075,20.506830
322,YELLOW BELLIED FLOWERPECKER,0.102526,20.845428
323,YELLOW CACIQUE,0.803484,21.381165
