## Import libraries

In [39]:
from google.colab import drive
from pathlib import Path
from matplotlib import pyplot as plt 
import pandas as pd
import numpy as np
import time
import os
import csv
import math
import concurrent.futures
from PIL import Image, ImageDraw, ImageFont

## Utility functions

### Create annot and load descriptors

In [2]:
def create_annot(path):
  image_list = list(Path(path).glob('*/*.jpg'))
  # the identity name is in the path (the name of the parent directory)
  names_list = [i.parent.name for i in image_list]  # get the identity of each image
  # keep info in a pandas DataFrame
  annot = pd.DataFrame({'identity': names_list, 'image_path': image_list})
  return annot

def concatenate_annots(list_of_paths):
  concat_annot = pd.DataFrame()
  with concurrent.futures.ThreadPoolExecutor() as executor:
    annots = [executor.submit(create_annot, path) for path in list_of_paths]
    for annot in annots:
      new_annot = annot.result()
      concat_annot = concat_annot.append(new_annot, ignore_index = True)
    return concat_annot

In [3]:
def load_descriptors(path):
  with open(path, 'rb') as file:
    return np.load(file)

def concatenate_descriptors(list_of_paths):
  concat_descriptors = None
  with concurrent.futures.ThreadPoolExecutor() as executor:
    descriptors = [executor.submit(load_descriptors, path) for path in list_of_paths]
    for descriptor in descriptors:
      new_descriptor = descriptor.result()
      if concat_descriptors is None:
        concat_descriptors = new_descriptor
      else:
        concat_descriptors = np.concatenate([concat_descriptors, new_descriptor])
    return concat_descriptors

### Create pivots

In [4]:
def generate_pivots(descriptors, n, strategy="rnd"):
  if strategy == "kMED":
    kmedoids = sklearn_extra.cluster.KMedoids(n_clusters=n).fit(descriptors)
    return kmedoids.cluster_centers_
  if strategy != "rnd":
    print(strategy, "was not implemented. Random pivots were returned")
  pivots_id = np.random.choice(np.arange(len(descriptors)), size=n)
  return descriptors[pivots_id]

def generate_list_of_pivots(descriptors, t, n, strategy="rnd"):
  list_of_pivots = []
  with concurrent.futures.ThreadPoolExecutor() as executor:
    pivots = [executor.submit(generate_pivots, descriptors, n, strategy) for i in range(t)]
    for pivot in concurrent.futures.as_completed(pivots):
      new_pivot = pivot.result()
      list_of_pivots.append(new_pivot)
    return list_of_pivots

### Save test results

In [5]:
def save_results(dir, file_name, results):
  with open(os.path.join(dir, file_name +".csv"), 'w') as f:
    writer = csv.writer(f)
    # write the header
    writer.writerow(["CLASS", "AP", "QUERY TIME"])
    # write the data
    for r in results:
      writer.writerow(r) 

## Test Performance

In [6]:
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [7]:
!unzip -q "/content/drive/My Drive/CV_Birds/FinalIndexes.zip" -d "/content"

### Create annot and load descriptors for the database

In [25]:
db_annot = concatenate_annots(['/content/drive/MyDrive/CV_Birds/train', '/content/drive/MyDrive/CV_Birds/mirflickr25k'])
db_annot

Unnamed: 0,identity,image_path
0,AFRICAN CROWNED CRANE,/content/drive/MyDrive/CV_Birds/train/AFRICAN ...
1,AFRICAN CROWNED CRANE,/content/drive/MyDrive/CV_Birds/train/AFRICAN ...
2,AFRICAN CROWNED CRANE,/content/drive/MyDrive/CV_Birds/train/AFRICAN ...
3,AFRICAN CROWNED CRANE,/content/drive/MyDrive/CV_Birds/train/AFRICAN ...
4,AFRICAN CROWNED CRANE,/content/drive/MyDrive/CV_Birds/train/AFRICAN ...
...,...,...
72327,mirflickr,/content/drive/MyDrive/CV_Birds/mirflickr25k/m...
72328,mirflickr,/content/drive/MyDrive/CV_Birds/mirflickr25k/m...
72329,mirflickr,/content/drive/MyDrive/CV_Birds/mirflickr25k/m...
72330,mirflickr,/content/drive/MyDrive/CV_Birds/mirflickr25k/m...


In [28]:
db_descriptors = concatenate_descriptors(['/content/drive/MyDrive/CV_Birds/features/training/AutoEncoder/512to128withPace64.npy','/content/drive/MyDrive/CV_Birds/features/distractor/AutoEncoder/512to128withPace64.npy'])
db_descriptors.shape

(72332, 128)

### Create annot and load descriptors for the test set

In [137]:
query_annot = create_annot('/content/drive/MyDrive/CV_Birds/test')
query_annot

Unnamed: 0,identity,image_path
0,AFRICAN CROWNED CRANE,/content/drive/MyDrive/CV_Birds/test/AFRICAN C...
1,AFRICAN CROWNED CRANE,/content/drive/MyDrive/CV_Birds/test/AFRICAN C...
2,AFRICAN CROWNED CRANE,/content/drive/MyDrive/CV_Birds/test/AFRICAN C...
3,AFRICAN CROWNED CRANE,/content/drive/MyDrive/CV_Birds/test/AFRICAN C...
4,AFRICAN CROWNED CRANE,/content/drive/MyDrive/CV_Birds/test/AFRICAN C...
...,...,...
1620,YELLOW HEADED BLACKBIRD,/content/drive/MyDrive/CV_Birds/test/YELLOW HE...
1621,YELLOW HEADED BLACKBIRD,/content/drive/MyDrive/CV_Birds/test/YELLOW HE...
1622,YELLOW HEADED BLACKBIRD,/content/drive/MyDrive/CV_Birds/test/YELLOW HE...
1623,YELLOW HEADED BLACKBIRD,/content/drive/MyDrive/CV_Birds/test/YELLOW HE...


In [29]:
query_descriptors = load_descriptors('/content/drive/MyDrive/CV_Birds/features/test/AutoEncoder/512to128withPace64.npy')
query_descriptors.shape

(1625, 128)

### Create PP-Index

In [30]:
def get_descriptor_from_id(id_object):
  return db_descriptors[id_object]

In [31]:
%cd "/content/drive/MyDrive/CV_Birds/Notebooks/PP-Index"
%run PPIndex.ipynb

/content/drive/.shortcut-targets-by-id/1rI5YNBuaSlCB__w522WEkHjw-nFuvIo0/CV_Birds/Notebooks/PP-Index


In [32]:
# cosine forest
cosine_forest = PrefixForest("", length=3, distance_metric='cosine', base_directory="/content/indexes/fine_tuning/forest/cosine", forest_file='forest_structure')

Forest retrieved from disk


### Compute AP

In [None]:
birds_db = db_annot.loc[db_annot['identity'] != 'mirflickr']
counts = birds_db.groupby('identity').count()
print("Minimum number of images per species:", int(counts.min()))
print("Maximum number of images per species:", int(counts.max()))
print("Average number of images:", float(counts.sum()/325))

Minimum number of images per species: 116
Maximum number of images per species: 249
Average number of images: 145.63692307692307


Since at most we have 249 images per species, we use $n=250$.

In [16]:
n = 250

The formula for Average Precision is the following:

> $AP@n=\frac{1}{GTP}\sum_{k=1}^{n}P@k×rel@k$

where $GTP$ refers to the total number of ground truth positives, $n$ refers to the total number of images we are interested in, $P@k$ refers to the precision@k and $rel@k$ is a relevance function. 

The relevance function is an indicator function which equals 1 if the document at rank $k$ is relevant and equals to 0 otherwise.

In [57]:
def compute_ap(query_index, retrieved_ids):
  query_identity = query_annot['identity'][query_index]
  GTP = len(db_annot.loc[db_annot['identity'] == query_identity])
  relevant = 0
  precision_summation = 0
  for k, id in enumerate(retrieved_ids):
    if db_annot['identity'][id] == query_identity: # relevant result
      relevant = relevant + 1
      precision_at_k = relevant/(k+1)
      precision_summation = precision_summation + precision_at_k
  return (query_identity, precision_summation/GTP)

In [98]:
def plot_images(indexes, limit):
  rows = math.ceil(limit/5)
  columns = 5
  fig = plt.figure(figsize=(columns*4,rows*4))
  for i, id in enumerate(indexes[:limit]):
    img = np.squeeze(Image.open(db_annot['image_path'][id]))
    fig.add_subplot(rows, columns, i+1, )
    identity = db_annot['identity'][id]
    title = str(i) + ") Species: " + identity
    plt.title(title)
    plt.imshow(img)
    plt.axis('off')
    
  fig.tight_layout()
  plt.show()

#### Bobolink

In [133]:
query_index = 255
ids, distances = cosine_forest.find_nearest_neighbors(query_descriptors[query_index], n, perturbations=3)
ids = ids.tolist()
identity, ap = compute_ap(query_index, ids)
img = Image.open(query_annot['image_path'][query_index])
print("AP:", ap)
display(img)
plot_images(ids, 20)

Output hidden; open in https://colab.research.google.com to view.

In [135]:
query_index = 256
ids, distances = cosine_forest.find_nearest_neighbors(query_descriptors[query_index], n, perturbations=3)
ids = ids.tolist()
identity, ap = compute_ap(query_index, ids)
img = Image.open(query_annot['image_path'][query_index])
print("AP:", ap)
display(img)
plot_images(ids, 20)

Output hidden; open in https://colab.research.google.com to view.

#### Lilac Roller

In [128]:
query_index = 905
ids, distances = cosine_forest.find_nearest_neighbors(query_descriptors[query_index], n, perturbations=3)
ids = ids.tolist()
identity, ap = compute_ap(query_index, ids)
img = Image.open(query_annot['image_path'][query_index])
print("AP:", ap)
display(img)
plot_images(ids, 20)

Output hidden; open in https://colab.research.google.com to view.

In [131]:
query_index = 906
ids, distances = cosine_forest.find_nearest_neighbors(query_descriptors[query_index], n, perturbations=3)
ids = ids.tolist()
identity, ap = compute_ap(query_index, ids)
img = Image.open(query_annot['image_path'][query_index])
print("AP:", ap)
display(img)
plot_images(ids, 20)

Output hidden; open in https://colab.research.google.com to view.

#### Eastern Towee

In [109]:
query_index = 535
ids, distances = cosine_forest.find_nearest_neighbors(query_descriptors[query_index], n, perturbations=3)
ids = ids.tolist()
identity, ap = compute_ap(query_index, ids)
img = Image.open(query_annot['image_path'][query_index])
print("AP:", ap)
display(img)
plot_images(ids, 20)

Output hidden; open in https://colab.research.google.com to view.

In [110]:
query_index = 536
ids, distances = cosine_forest.find_nearest_neighbors(query_descriptors[query_index], n, perturbations=3)
ids = ids.tolist()
identity, ap = compute_ap(query_index, ids)
img = Image.open(query_annot['image_path'][query_index])
print("AP:", ap)
display(img)
plot_images(ids, 20)

Output hidden; open in https://colab.research.google.com to view.