In [None]:
import os
import sys
import json
import numpy as np
import pdb


In [None]:
# https://www.pyimagesearch.com/2016/11/07/intersection-over-union-iou-for-object-detection/
def IoU(boxA, boxB):
  # determine the (x, y)-coordinates of the intersection rectangle
  xA = max(boxA[0], boxB[0])
  yA = max(boxA[1], boxB[1])
  xB = min(boxA[2], boxB[2])
  yB = min(boxA[3], boxB[3])
  # compute the area of intersection rectangle
  interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
  # compute the area of both the prediction and ground-truth rectangles
  boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
  boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
  # compute the intersection over union by taking the intersection area
  # and dividing it by the sum of prediction + ground-truth areas (interesection area)
  IoU = interArea / float(boxAArea + boxBArea - interArea)
  return IoU

# load any word embeddings with format used by Word2Vec, glove, etc.
def load_word_embeddings(file):
  embeddings_dict = {}
  head = True
  with open(file, 'r') as f:
    for line in f:
      if head:
        head = False
        continue
      values = line.split()
      word = values[0]
      vector = np.asarray(values[1:], "float32")
      embeddings_dict[word] = vector
  return embeddings_dict

In [10]:
def relevance_score(query, image):
  # find spatial score using IoU
  # box coordinates are normalized for 64x64 image
  # TODO: fix box coord bug
  q_box_s, q_box_o = query['data']['s_box'], query['data']['o_box']
  i_box_s, i_box_o = image ['data']['s_box'], image['data']['o_box']
  #print(q_box_s, q_box_o)
  #print(i_box_s, i_box_o)
  #s_spatial = (IoU(q_box_s, q_box_o) + IoU(i_box_s, i_box_o))/2
  s_spatial = 0
  
  # use word embedding to determine semantic similarity between query terms
  obj_score = np.dot(embeds[ query['data']['object'] ], embeds[ image['data']['object'] ] )
  
  # predicate may have preposition such as "parked on"
  val1 = query['data']['predicate'].split()
  p_q_embed = embeds[val1[0]]
  val2 = image['data']['predicate'].split()
  p_i_embed = embeds[val2[0]]  
  pred_score = np.dot(p_q_embed, p_i_embed)
    
  #print('predicate score:', val1[0], val2[0], '->', pred_score) 
  #pred_score = np.dot(embeds[ query['data']['predicate'] ], embeds[ image['data']['predicate'] ] ) 
  subj_score = np.dot(embeds[ query['data']['subject'] ], embeds[ image['data']['subject'] ] )
  #s_semantic = (obj_score + pred_score + subj_score)/3

  # try dot product between concatenated query vector
  q_vec = np.concatenate(( embeds[ query['data']['subject'] ], p_q_embed, embeds[ query['data']['object'] ]), axis=None)
  i_vec = np.concatenate(( embeds[ image['data']['subject'] ], p_i_embed, embeds[ image['data']['object'] ]), axis=None)                                                                                                      
  s_semantic = np.dot(q_vec, i_vec)

  return s_spatial + s_semantic

In [4]:
# load queries
with open('queries.json') as f:
  queries = json.load(f)

# load images
with open('docs.json') as f:
  images = json.load(f)

# load word embeddings
embedding_file = "./word_embeddings/numberbatch-en-19.08.txt"
embeds = load_word_embeddings(embedding_file)
print('Done loading word embedding! whee!')

Done loading word embedding! whee!


In [9]:
# iterate over all queries
print('Processing', len(queries), 'queries')
for n, query in enumerate(queries):
  #print(query)
  scores = []
  labels = []
  print('query:', query['data']['subject'],query['data']['predicate'],query['data']['object'])
    
  # iterate over image db
  for m, image in enumerate(images):
    #print(image)
    # sort by this
    score = relevance_score(query, image)
    #print('image:', image['data']['subject'],image['data']['predicate'],image['data']['object'])
    #print('relevance score = ', score)
    scores.append(score)
    labels.append([image['data']['subject'],image['data']['predicate'],image['data']['object']])
  
  # display retrieval
  scores = np.array(scores)
  labels = np.array(labels)
  n = len(scores)
  index = scores.argsort(axis=0)[::-1][:n]
  labels = labels[index]
  print('retrieved images:')
  print(labels[0:10])
  pdb.set_trace()

Processing 100 queries
query: shelf against wall
retrieved images:
[['shelf' 'against' 'wall']
 ['shelf' 'against' 'wall']
 ['sink' 'against' 'wall']
 ['pillow' 'against' 'wall']
 ['board' 'against' 'wall']
 ['stand' 'against' 'wall']
 ['counter' 'by' 'wall']
 ['mirror' 'on' 'wall']
 ['shelf' 'above' 'door']
 ['tree' 'over' 'wall']]
> <ipython-input-9-3fb64112d383>(3)<module>()
-> for n, query in enumerate(queries):
(Pdb) c
query: picture made of book
retrieved images:
[['picture' 'has' 'picture']
 ['wall' 'made of' 'glass']
 ['photo' 'with' 'photo']
 ['building' 'made of' 'glass']
 ['picture' 'hanging on' 'wall']
 ['picture' 'hanging on' 'wall']
 ['picture' 'hanging on' 'wall']
 ['picture' 'hanging on' 'wall']
 ['picture' 'hanging on' 'wall']
 ['picture' 'on' 'table']]
> <ipython-input-9-3fb64112d383>(3)<module>()
-> for n, query in enumerate(queries):
(Pdb) c
query: picture has picture
retrieved images:
[['picture' 'has' 'picture']
 ['shirt' 'has' 'picture']
 ['photo' 'with' 'photo']

BdbQuit: 