In [13]:
from google.colab import drive
drive.mount('/content/drive')

# generated from similarity.ipynb
!cp drive/My\ Drive/bigdata/similarity/results/nn-true-answers.npz .
# generated from similarity.ipynb
!cp drive/My\ Drive/bigdata/similarity/results/nn-qs-answers.npz .

# generated from features.ipynb
!cp drive/My\ Drive/bigdata/similarity/true-classes.npz . 
# generated from features.ipynb
!cp drive/My\ Drive/bigdata/similarity/resnet50-pretrained-np-dump.npz .
# generated from qs.cpp (local)
!cp drive/My\ Drive/bigdata/similarity/resnet50-qs-1282015-dump.npz.zip .
!unzip resnet50-qs-1282015-dump.npz.zip

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Archive:  resnet50-qs-1282015-dump.npz.zip
replace resnet50-qs-1282015-dump.npz? [y]es, [n]o, [A]ll, [N]one, [r]ename: n


In [0]:
import numpy as np

'''
Global inits
'''

# ground truth for training
true_training_categories   = []
# ground truth for queries
true_query_categories      = []
true_training_features     = []
true_query_features        = []
sketched_training_features = []
sketched_query_features    = []

true_answers = []
sketched_answers = []
DIM       = 2048

'''
Load query outputs, #number of queries long
'''
plain_answers_file   = np.load("nn-true-answers.npz")
sketched_answers_file = np.load("nn-qs-answers.npz")
stats_file          = np.load("stats.npz")

# answers from original features
plain_answers = plain_answers_file['true_answers']
# answers from sketched features
sketched_answers = sketched_answers_file['sketched_answers']
#
accuracy = stats_file['accuracy']
distortion = stats_file['distortion']

'''
Load ground truth labels and embeddings
'''
# first entry of both is [], for class indexing 1-257
true_file = np.load("true-classes.npz", allow_pickle=True)
train_ex = true_file['tr_paths'] # has first element as []
query_ex = true_file['qr_paths'] # has first element as []
for category in range(1, 258):
    for im in train_ex[category]:
        true_training_categories.append(int(im[1]))
    for im in query_ex[category]:
        true_query_categories.append(int(im[1]))

## from the colab file, as is, no change required
true_features_file = np.load("resnet50-pretrained-np-dump.npz", allow_pickle=True)
true_training_features = true_features_file['emb_training']
true_query_features = true_features_file['emb_query']

## from the cpp codes, requires to segregate training and query
sketched_features_file = np.load("resnet50-qs-1282015-dump.npz", allow_pickle=True)
#print(sketched_features_file.files)
all_sketched_features = sketched_features_file['qstq']
all_sketched_features_N = [all_sketched_features[i * DIM: (i + 1) * DIM] for i in range(int(len(all_sketched_features)/2048))]
NUM_QUERY  = len(true_query_features)
NUM_TRAIN  = len(true_training_features)
sketched_training_features = all_sketched_features_N[: NUM_TRAIN]
sketched_query_features = all_sketched_features_N[NUM_TRAIN: len(all_sketched_features_N)]

In [16]:
from numpy import linalg as LA

## performance of plain vs ground truth
count = 0
for idx in range(len(plain_answers)):
  if(plain_answers[idx][1] == plain_answers[idx][0]):
    count += 1
print("Acc of Plain Embeddings:", count / len(plain_answers))

## performance of QS vs ground truth
count = 0
for idx in range(len(sketched_answers)):
  if(sketched_answers[idx][1] == sketched_answers[idx][0]):
    count += 1
print("Acc of Quad Sketch:", count / len(sketched_answers))


# Computing distortion
    
exact_counter = 0
class_counter = 0
distortion = 0.0
for q in range(len(sketched_query_features)):
    plain_answer_feature = true_training_features[plain_answers[q][2]]
    dd2 = LA.norm(true_query_features[q] - plain_answer_feature)

    if dd2 < 1e-3:
      distortion += 1.0
      exact_counter += 1
      class_counter += 1
    else:
      dd1 = LA.norm(true_training_features[sketched_answers[q][2]] - true_query_features[q])
      distortion += dd1/dd2
      if(plain_answers[q][2] == sketched_answers[q][2]): # exact nearest neighbor pt
        # same nearest neighbor, so same class
        exact_counter += 1
        class_counter += 1
      else:
        # different nearest neighbor
        who1 = plain_answers[q][2]
        who2 = sketched_answers[q][2]
        # check for same class
        if(true_training_categories[who1] == true_training_categories[who2]):
          class_counter += 1
        
##
distortion = distortion / len(sketched_query_features)
accuracy = exact_counter / len(sketched_query_features)
class_accuracy = class_counter / len(sketched_query_features)
print("Avg Distortion: ", distortion)
print("Exact Accuracy:", accuracy)
print("Class Accuracy:", class_accuracy)

Acc of Plain Embeddings: 0.7327033412082349
Acc of Quad Sketch: 0.7283158960512993
Avg Distortion:  1.0016585672943727
Exact Accuracy: 0.847789402632467
Class Accuracy: 0.9375632804589943
