In [None]:
import os
import sys
import json
import numpy as np
import matplotlib.pyplot as plt
import pdb
from tqdm.auto import tqdm


In [2]:
# https://www.pyimagesearch.com/2016/11/07/intersection-over-union-iou-for-object-detection/
def IoU(boxA, boxB):
  # determine the (x, y)-coordinates of the intersection rectangle
  xA = max(boxA[0], boxB[0])
  yA = max(boxA[1], boxB[1])
  xB = min(boxA[2], boxB[2])
  yB = min(boxA[3], boxB[3])
  # compute the area of intersection rectangle
  interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
  # compute the area of both the prediction and ground-truth rectangles
  boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
  boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
  # compute the intersection over union by taking the intersection area
  # and dividing it by the sum of prediction + ground-truth areas (interesection area)
  IoU = interArea / float(boxAArea + boxBArea - interArea)
  return IoU

def box_distance(boxA, boxB):
  # determine the (x, y)-coordinates of the intersection rectangle
  xA = boxA[0] - boxB[0]
  yA = boxA[1] - boxB[1]
  xB = boxA[2] - boxB[2]
  yB = boxA[3] - boxB[3]
  # compute the area of intersection rectangle
  interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
  # compute the area of both the prediction and ground-truth rectangles
  boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
  boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
  # compute the intersection over union by taking the intersection area
  # and dividing it by the sum of prediction + ground-truth areas (interesection area)
  IoU = interArea / float(boxAArea + boxBArea - interArea)
  return IoU

# load any word embeddings with format used by Word2Vec, glove, etc.
def load_word_embeddings(file):
  embeddings_dict = {}
  head = True
  with open(file, 'r') as f:
    for line in f:
      if head:
        head = False
        continue
      values = line.split()
      word = values[0]
      vector = np.asarray(values[1:], "float32")
      embeddings_dict[word] = vector
  return embeddings_dict

In [None]:
def relevance_score(query, image):
  # find spatial score using IoU
  # box coordinates are normalized for 64x64 image
  # TODO: fix box coord bug
  q_box_s, q_box_o = query['data']['s_box'], query['data']['o_box']
  i_box_s, i_box_o = image ['data']['s_box'], image['data']['o_box']
  s_spatial = (IoU(q_box_s, q_box_o) + IoU(i_box_s, i_box_o))/2
  #s_spatial = 0

  
#   # use word embedding to determine semantic similarity between query terms
#   obj_score = np.dot(embeds[ query['data']['object'] ], embeds[ image['data']['object'] ] )
  
  # predicate may have preposition such as "parked on"
  val1 = query['data']['predicate'].split()
  p_q_embed = embeds[val1[0]]
  val2 = image['data']['predicate'].split()
  p_i_embed = embeds[val2[0]]  
  #pred_score = np.dot(p_q_embed, p_i_embed)
    
#   #print('predicate score:', val1[0], val2[0], '->', pred_score) 
#   #pred_score = np.dot(embeds[ query['data']['predicate'] ], embeds[ image['data']['predicate'] ] ) 
#   subj_score = np.dot(embeds[ query['data']['subject'] ], embeds[ image['data']['subject'] ] )
#   #s_semantic = (obj_score + pred_score + subj_score)/3

  # try dot product between concatenated query vector
  q_vec = np.concatenate(( embeds[ query['data']['subject'] ], p_q_embed, embeds[ query['data']['object'] ]), axis=None)/np.sqrt(3)
  i_vec = np.concatenate(( embeds[ image['data']['subject'] ], p_i_embed, embeds[ image['data']['object'] ]), axis=None)/np.sqrt(3)                                                                                                      
  s_semantic = np.dot(q_vec, i_vec)
  
  alpha = 0.7

  return s_spatial*s_semantic
  #return alpha*s_spatial*(1-alpha)*s_semantic

In [None]:
def spatial_score(query, image):
  # find spatial score using IoU
  # box coordinates are normalized for 64x64 image
  # TODO: fix box coord bug
  q_box_s, q_box_o = query['data']['s_box'], query['data']['o_box']
  i_box_s, i_box_o = image ['data']['s_box'], image['data']['o_box']
  s_spatial = (IoU(q_box_s, q_box_o) + IoU(i_box_s, i_box_o))/2
  return s_spatial

In [None]:
def semantic_score(query, image):
  
  # predicate may have preposition such as "parked on"
  val1 = query['data']['predicate'].split()
  p_q_embed = embeds[val1[0]]
  val2 = image['data']['predicate'].split()
  p_i_embed = embeds[val2[0]]  
  pred_score = np.dot(p_q_embed, p_i_embed)

  # try dot product between concatenated query vector
  q_vec = np.concatenate(( embeds[ query['data']['subject'] ], p_q_embed, embeds[ query['data']['object'] ]), axis=None)/np.sqrt(3)
  i_vec = np.concatenate(( embeds[ image['data']['subject'] ], p_i_embed, embeds[ image['data']['object'] ]), axis=None)/np.sqrt(3)                                                                                                      
  s_semantic = np.dot(q_vec, i_vec)

  return s_semantic

In [3]:
%%time

# load queries
with open('queries.json') as f:
  queries = json.load(f)

# load images
with open('docs.json') as f:
  images = json.load(f)

CPU times: user 32.3 ms, sys: 3.87 ms, total: 36.1 ms
Wall time: 36.1 ms


In [5]:
%%time
# load word embeddings
embedding_file = "./word_embeddings/numberbatch-en-19.08.txt"
embeds = load_word_embeddings(embedding_file)
print('Done loading word embedding! whee!')

Done loading word embedding! whee!
CPU times: user 29.3 s, sys: 1.21 s, total: 30.5 s
Wall time: 30.9 s


In [None]:
%%time

# relevance threshold
relevance_thresh = 0.25
# Q-D matrix of relevances->recalls
relevances = np.zeros((len(queries),len(images))) 
all_scores = np.zeros((len(queries),len(images)))

# iterate over all queries
print('Processing', len(queries), 'queries')

for n, query in enumerate(tqdm(queries, leave=False)):
 # print('n =', n)
  scores = []
  labels = []
  #print('query:', query['data']['subject'],query['data']['predicate'],query['data']['object'])
  
  # iterate over image db
  for m, image in enumerate(images):
    #print(image)
    # sort by this
    score = relevance_score(query, image)
    #print('image:', image['data']['subject'],image['data']['predicate'],image['data']['object'])
    #print('relevance score = ', score)
    scores.append(score)
    labels.append([image['data']['subject'],image['data']['predicate'],image['data']['object']])
  
  
  # display retrieval
#   scores = np.array(scores)
#   labels = np.array(labels)
#   n_label = len(scores)
#   index = scores.argsort(axis=0)[::-1][:n_label]
#   labels = labels[index]
#   #print('retrieved images:')
#   #print(labels[0:10])
#   # calculate recall (these are "ideal" relevances for this example)
#   relevances[n][np.where(scores[index] > relevance_thresh)] = 1
  all_scores[n] = scores

relevances = all_scores > relevance_thresh

Processing 100 queries


HBox(children=(IntProgress(value=0), HTML(value='')))

> <ipython-input-10-2099e1a14382>(31)relevance_score()
-> i_vec = np.concatenate(( embeds[ image['data']['subject'] ], p_i_embed, embeds[ image['data']['object'] ]), axis=None) # /np.sqrt(3)
(Pdb) n
> <ipython-input-10-2099e1a14382>(32)relevance_score()
-> s_semantic = np.dot(q_vec, i_vec)
(Pdb) print(d)
1.0000045


In [None]:
for n, query in enumerate(tqdm(queries, leave=False)):
  # iterate over image db
  for m, image in enumerate(images):
     sys_scores[n,m] = spatial_score(query, image)

In [None]:
for n, query in enumerate(tqdm(queries, leave=False)):
  # iterate over image db
  for m, image in enumerate(images):
     sys_scores[n,m] = semantic_score(query, image)

In [None]:
all_scores.shape
plt.matshow(all_scores)
#plt.colorbar()
plt.matshow(relevances)

In [None]:
sys_scores = all_scores.copy()
sys_scores = sys_scores + 0.1*np.random.random(sys_scores.shape)
sys_index = np.argsort(sys_scores, axis=1)[:,::-1]

relevances_sort = np.take_along_axis(relevances, sys_index, 1)
recalls = np.cumsum(relevances_sort, 1)/(np.sum(relevances_sort, 1)[:,None] + 1e-12) # [:,None] transposes matrix
mean_recall = np.mean(recalls, axis=0) # column-wise

# best-case or ideal recall
#pdb.set_trace()
# plot best-case recall
fig = plt.figure()
plt.grid()
plt.xlim(0, len(mean_recall))
plt.xlabel('k')
plt.ylim(0,1.01)
plt.ylabel('Recall at k')
x = np.arange(1,len(mean_recall)+1)
#pdb.set_trace()
plt.plot(x, mean_recall)
plt.show()

In [None]:

sys_scores = all_scores.copy()
sys_scores = sys_scores + 0.1*np.random.random(sys_scores.shape)
sys_index = np.argsort(sys_scores, axis=1)[:,::-1]

relevances_sort = np.take_along_axis(relevances, sys_index, 1)
recalls = np.cumsum(relevances_sort, 1)/(np.sum(relevances_sort, 1)[:,None] + 1e-12) # [:,None] transposes matrix
mean_recall = np.mean(recalls, axis=0) # column-wise

# best-case or ideal recall
#pdb.set_trace()
# plot best-case recall
fig = plt.figure()
plt.grid()
plt.xlim(0, len(mean_recall))
plt.xlabel('k')
plt.ylim(0,1.01)
plt.ylabel('Recall at k')
x = np.arange(1,len(mean_recall)+1)
#pdb.set_trace()
plt.plot(x, mean_recall)
plt.show()

In [None]:
# fake
sys_scores = all_scores.copy()
sys_scores = sys_scores + 0.1*np.random.random(sys_scores.shape)

In [None]:
# other system
sys_index = np.argsort(sys_scores, axis=1)[:,::-1]
sys_relevances_sort = np.take_along_axis(relevances, sys_index, 1)
sys_recalls = np.cumsum(sys_relevances_sort, 1)/(np.sum(sys_relevances_sort, 1)[:,None] + 1e-12) # [:,None] transposes matrix
sys_mean_recall = np.mean(sys_recalls, axis=0) # column-wise

# ideal
relevances_sort = np.sort(relevances, axis=1)[:,::-1]
recalls = np.cumsum(relevances_sort, 1)/(np.sum(relevances_sort, 1)[:,None] + 1e-12) # [:,None] transposes matrix
mean_recall = np.mean(recalls, axis=0) # column-wise

# best-case or ideal recall
#pdb.set_trace()
# plot best-case recall
fig = plt.figure()
plt.grid()
plt.xlim(0, len(mean_recall))
plt.xlabel('k')
plt.ylim(0,1.01)
plt.ylabel('Recall at k')
x = np.arange(1,len(mean_recall)+1)
#pdb.set_trace()
plt.plot(x, mean_recall, label="ideal")
plt.plot(x, sys_mean_recall, label="sys")
plt.legend()
plt.show()

define expts by task: each expt based upon different tasks
different rel thresh
blend of spatial vs semaTic 