In [1]:
data_path = "../data/"
img_path = data_path + "images/"
mask_path = data_path + "mask/"
bbox_path = data_path + "referit_edgeboxes_top100/"

In [2]:
import os
import pandas as pd
import numpy as np
import scipy.io as sio
import re
import pickle

In [17]:
imgs = os.listdir(img_path)
masks = os.listdir(mask_path)
bboxes = os.listdir(bbox_path)

In [18]:
object_table = pd.DataFrame(columns = ["object_id", "img", "loc", "loc_rel", "img_size"])

for mask in masks:
    mat = sio.loadmat(mask_path + mask)
    mat = mat["segimg_t"]

    rowIndices, colIndices = np.where(mat==0)
    height, width = mat.shape 
    yMin = np.min(rowIndices)
    xMin = np.min(colIndices)
    yMax = np.max(rowIndices)
    xMax = np.max(colIndices)
    yMin_rel = (np.min(rowIndices) / height) * 2 -1
    xMin_rel = (np.min(colIndices) / width) * 2 -1
    yMax_rel = (np.max(rowIndices) / height) * 2 -1
    xMax_rel = (np.max(colIndices) / width) * 2 -1

    reg = re.search('((\d+)_\d+)\.mat$', mask)
    obj_id = reg.group(1)
    img = reg.group(2)
    object_table.loc[len(object_table)] = [obj_id, img, [xMin, yMin, xMax, yMax], [xMin_rel, yMin_rel, xMax_rel, yMax_rel], [width, height]]

In [20]:
query_table = pickle.load(open(data_path + "querry_df.pkl", "rb"))
object_table = object_table.merge(query_table.loc[:, ["obj_id","querry","embedding"]], left_on="object_id", right_on="obj_id", how="right")
object_table = object_table[["obj_id", "img", "loc", "loc_rel", "img_size", "querry", "embedding"]]
object_table.columns = ["obj_id", "img", "loc", "loc_rel", "img_size", "expr", "emb_expr"]

In [87]:
labels = pickle.load(open(data_path + "labels", "rb"))
object_table = object_table.merge(labels[["obj_id","label"]], on="obj_id", how="left")

In [94]:
object_table.head()

Unnamed: 0,obj_id,img,loc,loc_rel,img_size,expr,emb_expr,label
0,9881_4,9881,"[266, 161, 415, 209]","[0.10833333333333339, -0.10555555555555551, 0....","[480, 360]",ground between lead rider and second rider,"[-0.01790246, 0.06057319, 0.03528392, -0.01663...",ground
1,24410_1,24410,"[298, 182, 408, 313]","[0.2416666666666667, 0.011111111111111072, 0.7...","[480, 360]",closest island,"[-0.07347091, -0.035721637, -0.048873313, -0.0...",island
2,11135_1,11135,"[0, 224, 359, 479]","[-1.0, -0.06666666666666665, 0.994444444444444...","[360, 480]",sidewalk,"[-0.031596936, -0.04462336, -0.011513167, -0.0...",sidewalk
3,1317_6,1317,"[68, 110, 82, 270]","[-0.7166666666666667, -0.38888888888888884, -0...","[480, 360]",post on left of house under the deck,"[-0.053634763, 0.017997567, -0.07499985, 0.024...",column
4,35720_1,35720,"[0, 0, 479, 359]","[-1.0, -1.0, 0.9958333333333333, 0.99444444444...","[480, 360]",anywhere,"[-0.027327683, -0.003149564, 0.0010297332, -0....",city


In [88]:
pickle.dump(object_table, open("object_table", "wb"))

In [19]:
num_bboxes_per_img = 10

bbox_list = []

for bb in bboxes:
    img = bb.split(".")[0]
   
    loc_act = np.loadtxt(bbox_path + bb, max_rows=num_bboxes_per_img)
    if len(loc_act) == 0:
        continue
    if loc_act.ndim == 1:
        loc_act = [loc_act]
        bbox_id = img + "_f1"
    else:
        loc_act = [i for i in loc_act]
        bbox_id = [img + "_f" + str(i) for i in range(1,len(loc_act)+1)]
        img = list(np.repeat(img, len(loc_act)))
    
    bbox_list.append(pd.DataFrame({"bbox_id":bbox_id, "img":img, "loc_act":loc_act}))

bbox_table = pd.concat(bbox_list)

  


In [93]:
bbox_table.head()

Unnamed: 0,bbox_id,img,loc_act
0,20260_f1,20260,"[0.0, 277.0, 83.0, 336.0]"
1,20260_f2,20260,"[0.0, 203.0, 223.0, 341.0]"
2,20260_f3,20260,"[322.0, 195.0, 356.0, 249.0]"
3,20260_f4,20260,"[0.0, 274.0, 104.0, 342.0]"
4,20260_f5,20260,"[0.0, 202.0, 236.0, 449.0]"


In [98]:
bbox_table = bbox_table.merge(object_table.loc[:,["img", "img_size"]].drop_duplicates(subset="img"), on="img")
bbox_table["loc_rel"] = bbox_table.apply(lambda r: [r.loc_act[0]/r.img_size[0],
                                                    r.loc_act[1]/r.img_size[1],
                                                    r.loc_act[2]/r.img_size[0],
                                                    r.loc_act[3]/r.img_size[1]], axis=1)

In [99]:
bbox_table.head()

Unnamed: 0,bbox_id,img,loc_act,img_size,loc_rel
0,20260_f1,20260,"[0.0, 277.0, 83.0, 336.0]","[360, 480]","[0.0, 0.5770833333333333, 0.23055555555555557,..."
1,20260_f2,20260,"[0.0, 203.0, 223.0, 341.0]","[360, 480]","[0.0, 0.42291666666666666, 0.6194444444444445,..."
2,20260_f3,20260,"[322.0, 195.0, 356.0, 249.0]","[360, 480]","[0.8944444444444445, 0.40625, 0.98888888888888..."
3,20260_f4,20260,"[0.0, 274.0, 104.0, 342.0]","[360, 480]","[0.0, 0.5708333333333333, 0.28888888888888886,..."
4,20260_f5,20260,"[0.0, 202.0, 236.0, 449.0]","[360, 480]","[0.0, 0.42083333333333334, 0.6555555555555556,..."


In [100]:
pickle.dump(bbox_table, open("bbox_table", "wb"))

In [119]:
def compute_IoU(loc1, loc2):
    #print(loc1)
    #print(loc2)
    i_xmin = max(loc1[0], loc2[0])
    i_ymin = max(loc1[1], loc2[1])
    i_xmax = min(loc1[2], loc2[2])
    i_ymax = min(loc1[3], loc2[3])
    #print("{},{},{},{}".format(i_xmin, i_ymin, i_xmax, i_ymax))
    area_i = max(i_xmax - i_xmin + 1, 0) * max(i_ymax - i_ymin + 1, 0)
    area1 = (loc1[2]-loc1[0]+1) * (loc1[3]-loc1[1]+1)
    area2 = (loc2[2]-loc2[0]+1) * (loc2[3]-loc2[1]+1)
    iou = area_i / (area1 + area2 - area_i)
    #print("{},{},{}".format(area1, area2, area_i))
    return iou

In [102]:
def compute_GIoU(loc1, loc2):
    i_xmin = np.maximum(loc1[0], loc2[0])
    i_ymin = np.maximum(loc1[1], loc2[1])
    i_xmax = np.minimum(loc1[2], loc2[2])
    i_ymax = np.minimum(loc1[3], loc2[3])
    c_xmin = np.minimum(loc1[0], loc2[0])
    c_ymin = np.minimum(loc1[1], loc2[1])
    c_xmax = np.maximum(loc1[2], loc2[2])
    c_ymax = np.maximum(loc1[3], loc2[3])
    area_i = np.maximum(i_xmax - i_xmin + 1, 0) * np.maximum(i_ymax - i_ymin + 1, 0)
    area_c = np.maximum(c_xmax - c_xmin + 1, 0) * np.maximum(c_ymax - c_ymin + 1, 0)
    area1 = (loc1[2]-loc1[0]+1) * (loc1[3]-loc1[1]+1)
    area2 = (loc2[2]-loc2[0]+1) * (loc2[3]-loc2[1]+1)
    area_u = area1 + area2 - area_i
    iou = area_i / area_u
    giou = iou - (area_c - area_u) / area_c
    return giou

In [103]:
whole_data = bbox_table.merge(object_table, on = "img")

In [104]:
whole_data.head()

Unnamed: 0,bbox_id,img,loc_act,img_size_x,loc_rel_x,obj_id,loc,loc_rel_y,img_size_y,expr,emb_expr,label
0,20260_f1,20260,"[0.0, 277.0, 83.0, 336.0]","[360, 480]","[0.0, 0.5770833333333333, 0.23055555555555557,...",20260_2,"[0, 0, 359, 208]","[-1.0, -1.0, 0.9944444444444445, -0.1333333333...","[360, 480]",the sky right hand corner,"[-0.009086175, 0.06696806, -0.07156556, 0.0605...",cloud
1,20260_f1,20260,"[0.0, 277.0, 83.0, 336.0]","[360, 480]","[0.0, 0.5770833333333333, 0.23055555555555557,...",20260_1,"[8, 265, 359, 479]","[-0.9555555555555556, 0.10416666666666674, 0.9...","[360, 480]",water on right,"[-0.010208251, 0.0080347, -0.045048416, 0.0062...",floor-other
2,20260_f1,20260,"[0.0, 277.0, 83.0, 336.0]","[360, 480]","[0.0, 0.5770833333333333, 0.23055555555555557,...",20260_3,"[178, 206, 224, 346]","[-0.011111111111111072, -0.14166666666666672, ...","[360, 480]",middle guy,"[-0.031950668, 0.014514642, 0.006182524, 0.026...",man
3,20260_f2,20260,"[0.0, 203.0, 223.0, 341.0]","[360, 480]","[0.0, 0.42291666666666666, 0.6194444444444445,...",20260_2,"[0, 0, 359, 208]","[-1.0, -1.0, 0.9944444444444445, -0.1333333333...","[360, 480]",the sky right hand corner,"[-0.009086175, 0.06696806, -0.07156556, 0.0605...",cloud
4,20260_f2,20260,"[0.0, 203.0, 223.0, 341.0]","[360, 480]","[0.0, 0.42291666666666666, 0.6194444444444445,...",20260_1,"[8, 265, 359, 479]","[-0.9555555555555556, 0.10416666666666674, 0.9...","[360, 480]",water on right,"[-0.010208251, 0.0080347, -0.045048416, 0.0062...",floor-other


In [120]:
whole_data["IoU"] = whole_data.apply(lambda r: compute_IoU(r["loc"], r["loc_act"]), axis=1)

In [121]:
whole_data.head()

Unnamed: 0,bbox_id,img,loc_act,img_size_x,loc_rel_x,obj_id,loc,loc_rel_y,img_size_y,expr,emb_expr,label,IoU
0,20260_f1,20260,"[0.0, 277.0, 83.0, 336.0]","[360, 480]","[0.0, 0.5770833333333333, 0.23055555555555557,...",20260_2,"[0, 0, 359, 208]","[-1.0, -1.0, 0.9944444444444445, -0.1333333333...","[360, 480]",the sky right hand corner,"[-0.009086175, 0.06696806, -0.07156556, 0.0605...",cloud,0.0
1,20260_f1,20260,"[0.0, 277.0, 83.0, 336.0]","[360, 480]","[0.0, 0.5770833333333333, 0.23055555555555557,...",20260_1,"[8, 265, 359, 479]","[-0.9555555555555556, 0.10416666666666674, 0.9...","[360, 480]",water on right,"[-0.010208251, 0.0080347, -0.045048416, 0.0062...",floor-other,0.059874
2,20260_f1,20260,"[0.0, 277.0, 83.0, 336.0]","[360, 480]","[0.0, 0.5770833333333333, 0.23055555555555557,...",20260_3,"[178, 206, 224, 346]","[-0.011111111111111072, -0.14166666666666672, ...","[360, 480]",middle guy,"[-0.031950668, 0.014514642, 0.006182524, 0.026...",man,0.0
3,20260_f2,20260,"[0.0, 203.0, 223.0, 341.0]","[360, 480]","[0.0, 0.42291666666666666, 0.6194444444444445,...",20260_2,"[0, 0, 359, 208]","[-1.0, -1.0, 0.9944444444444445, -0.1333333333...","[360, 480]",the sky right hand corner,"[-0.009086175, 0.06696806, -0.07156556, 0.0605...",cloud,0.012796
4,20260_f2,20260,"[0.0, 203.0, 223.0, 341.0]","[360, 480]","[0.0, 0.42291666666666666, 0.6194444444444445,...",20260_1,"[8, 265, 359, 479]","[-0.9555555555555556, 0.10416666666666674, 0.9...","[360, 480]",water on right,"[-0.010208251, 0.0080347, -0.045048416, 0.0062...",floor-other,0.184423


In [122]:
whole_data["GIoU"] = whole_data.apply(lambda r: compute_GIoU(r["loc"], r["loc_act"]), axis=1)

In [125]:
whole_data = whole_data[["bbox_id", "img", "img_size_x", "loc_act", "loc_rel_x", "obj_id", "expr", "emb_expr", "label", "loc", "loc_rel_y", "IoU", "GIoU"]]
whole_data.columns = ["bbox_id", "img", "img_size", "loc_act_bb", "loc_rel_bb", "obj_id", "expr", "emb_expr", "label", "loc_act_obj", "loc_rel_obj", "IoU", "GIoU"]

In [136]:
whole_data.head()

Unnamed: 0,bbox_id,img,img_size,loc_act_bb,loc_rel_bb,obj_id,expr,emb_expr,label,loc_act_obj,loc_rel_obj,IoU,GIoU
0,20260_f1,20260,"[360, 480]","[0.0, 277.0, 83.0, 336.0]","[0.0, 0.5770833333333333, 0.23055555555555557,...",20260_2,the sky right hand corner,"[-0.009086175, 0.06696806, -0.07156556, 0.0605...",cloud,"[0, 0, 359, 208]","[-1.0, -1.0, 0.9944444444444445, -0.1333333333...",0.0,-0.338279
1,20260_f1,20260,"[360, 480]","[0.0, 277.0, 83.0, 336.0]","[0.0, 0.5770833333333333, 0.23055555555555557,...",20260_1,water on right,"[-0.010208251, 0.0080347, -0.045048416, 0.0062...",floor-other,"[8, 265, 359, 479]","[-0.9555555555555556, 0.10416666666666674, 0.9...",0.059874,0.043853
2,20260_f1,20260,"[360, 480]","[0.0, 277.0, 83.0, 336.0]","[0.0, 0.5770833333333333, 0.23055555555555557,...",20260_3,middle guy,"[-0.031950668, 0.014514642, 0.006182524, 0.026...",man,"[178, 206, 224, 346]","[-0.011111111111111072, -0.14166666666666672, ...",0.0,-0.632246
3,20260_f2,20260,"[360, 480]","[0.0, 203.0, 223.0, 341.0]","[0.0, 0.42291666666666666, 0.6194444444444445,...",20260_2,the sky right hand corner,"[-0.009086175, 0.06696806, -0.07156556, 0.0605...",cloud,"[0, 0, 359, 208]","[-1.0, -1.0, 0.9944444444444445, -0.1333333333...",0.012796,-0.134117
4,20260_f2,20260,"[360, 480]","[0.0, 203.0, 223.0, 341.0]","[0.0, 0.42291666666666666, 0.6194444444444445,...",20260_1,water on right,"[-0.010208251, 0.0080347, -0.045048416, 0.0062...",floor-other,"[8, 265, 359, 479]","[-0.9555555555555556, 0.10416666666666674, 0.9...",0.184423,0.088795


In [None]:
pickle.dump(whole_data, open("whole_data_backup", "wb"))

In [133]:
whole_data = pickle.load(open("whole_data_backup", "rb"))

In [128]:
correct_boxes = object_table[["obj_id", "img", "img_size", "loc", "loc_rel", "obj_id", "expr", "emb_expr", "label", "loc", "loc_rel"]].copy()
correct_boxes["IoU"] = 1
correct_boxes["GoU"] = 1
correct_boxes.columns = whole_data.columns

In [135]:
correct_boxes.head()

Unnamed: 0,bbox_id,img,img_size,loc_act_bb,loc_rel_bb,obj_id,expr,emb_expr,label,loc_act_obj,loc_rel_obj,IoU,GIoU
0,9881_4,9881,"[480, 360]","[266, 161, 415, 209]","[0.10833333333333339, -0.10555555555555551, 0....",9881_4,ground between lead rider and second rider,"[-0.01790246, 0.06057319, 0.03528392, -0.01663...",ground,"[266, 161, 415, 209]","[0.10833333333333339, -0.10555555555555551, 0....",1,1
1,24410_1,24410,"[480, 360]","[298, 182, 408, 313]","[0.2416666666666667, 0.011111111111111072, 0.7...",24410_1,closest island,"[-0.07347091, -0.035721637, -0.048873313, -0.0...",island,"[298, 182, 408, 313]","[0.2416666666666667, 0.011111111111111072, 0.7...",1,1
2,11135_1,11135,"[360, 480]","[0, 224, 359, 479]","[-1.0, -0.06666666666666665, 0.994444444444444...",11135_1,sidewalk,"[-0.031596936, -0.04462336, -0.011513167, -0.0...",sidewalk,"[0, 224, 359, 479]","[-1.0, -0.06666666666666665, 0.994444444444444...",1,1
3,1317_6,1317,"[480, 360]","[68, 110, 82, 270]","[-0.7166666666666667, -0.38888888888888884, -0...",1317_6,post on left of house under the deck,"[-0.053634763, 0.017997567, -0.07499985, 0.024...",column,"[68, 110, 82, 270]","[-0.7166666666666667, -0.38888888888888884, -0...",1,1
4,35720_1,35720,"[480, 360]","[0, 0, 479, 359]","[-1.0, -1.0, 0.9958333333333333, 0.99444444444...",35720_1,anywhere,"[-0.027327683, -0.003149564, 0.0010297332, -0....",city,"[0, 0, 479, 359]","[-1.0, -1.0, 0.9958333333333333, 0.99444444444...",1,1


In [140]:
whole_data = pd.concat([whole_data, correct_boxes], sort=False)

In [142]:
pickle.dump(whole_data, open("whole_data_backup_with_correct_boxes", "wb"))

In [143]:
whole_data_small = whole_data[["bbox_id", "img", "loc_act_bb", "loc_rel_bb", "expr", "emb_expr", "IoU", "GIoU"]].copy()
whole_data_small.columns = ["bbox_id", "img", "loc_act", "loc_rel", "expr", "emb_expr", "IoU", "GIoU"]

In [144]:
pickle.dump(whole_data_small, open("whole_data", "wb"))

In [9]:
whole_data = pickle.load(open("whole_data", "rb"))

In [10]:
object_table = pickle.load(open("object_table", "rb"))

In [13]:
bbox_table = pickle.load(open("bbox_table", "rb"))

In [7]:
whole_data.head()

Unnamed: 0,bbox_id,img,loc_act,loc_rel,expr,emb_expr,IoU
0,20260_f1,20260,"[0.0, 277.0, 83.0, 336.0]","[0.0, 0.5770833333333333, 0.23055555555555557,...",the sky right hand corner,"[-0.009086175, 0.06696806, -0.07156556, 0.0605...",0.053869
1,20260_f1,20260,"[0.0, 277.0, 83.0, 336.0]","[0.0, 0.5770833333333333, 0.23055555555555557,...",water on right,"[-0.010208251, 0.0080347, -0.045048416, 0.0062...",0.247618
2,20260_f1,20260,"[0.0, 277.0, 83.0, 336.0]","[0.0, 0.5770833333333333, 0.23055555555555557,...",middle guy,"[-0.031950668, 0.014514642, 0.006182524, 0.026...",0.461398
3,20260_f2,20260,"[0.0, 203.0, 223.0, 341.0]","[0.0, 0.42291666666666666, 0.6194444444444445,...",the sky right hand corner,"[-0.009086175, 0.06696806, -0.07156556, 0.0605...",0.10331
4,20260_f2,20260,"[0.0, 203.0, 223.0, 341.0]","[0.0, 0.42291666666666666, 0.6194444444444445,...",water on right,"[-0.010208251, 0.0080347, -0.045048416, 0.0062...",0.373634


In [11]:
object_table.head()

Unnamed: 0,obj_id,img,loc,loc_rel,img_size,expr,emb_expr
0,9881_4,9881,"[266, 161, 415, 209]","[0.10833333333333339, -0.10555555555555551, 0....","[480, 360]",ground between lead rider and second rider,"[-0.01790246, 0.06057319, 0.03528392, -0.01663..."
1,24410_1,24410,"[298, 182, 408, 313]","[0.2416666666666667, 0.011111111111111072, 0.7...","[480, 360]",closest island,"[-0.07347091, -0.035721637, -0.048873313, -0.0..."
2,11135_1,11135,"[0, 224, 359, 479]","[-1.0, -0.06666666666666665, 0.994444444444444...","[360, 480]",sidewalk,"[-0.031596936, -0.04462336, -0.011513167, -0.0..."
3,1317_6,1317,"[68, 110, 82, 270]","[-0.7166666666666667, -0.38888888888888884, -0...","[480, 360]",post on left of house under the deck,"[-0.053634763, 0.017997567, -0.07499985, 0.024..."
4,35720_1,35720,"[0, 0, 479, 359]","[-1.0, -1.0, 0.9958333333333333, 0.99444444444...","[480, 360]",anywhere,"[-0.027327683, -0.003149564, 0.0010297332, -0...."


In [14]:
bbox_table.head()

Unnamed: 0,bbox_id,img,loc_act,img_size,loc_rel
0,20260_f1,20260,"[0.0, 277.0, 83.0, 336.0]","[360, 480]","[0.0, 0.5770833333333333, 0.23055555555555557,..."
1,20260_f2,20260,"[0.0, 203.0, 223.0, 341.0]","[360, 480]","[0.0, 0.42291666666666666, 0.6194444444444445,..."
2,20260_f3,20260,"[322.0, 195.0, 356.0, 249.0]","[360, 480]","[0.8944444444444445, 0.40625, 0.98888888888888..."
3,20260_f4,20260,"[0.0, 274.0, 104.0, 342.0]","[360, 480]","[0.0, 0.5708333333333333, 0.28888888888888886,..."
4,20260_f5,20260,"[0.0, 202.0, 236.0, 449.0]","[360, 480]","[0.0, 0.42083333333333334, 0.6555555555555556,..."


In [14]:
whos

Variable             Type         Data/Info
-------------------------------------------
bb                   str          22597.txt
bbox_id              list         n=10
bbox_list            list         n=19999
bbox_path            str          ../data/referit_edgeboxes_top100/
bbox_table           DataFrame              bbox_id    img <...>[199971 rows x 5 columns]
bboxes               list         n=20000
colIndices           ndarray      22128: 22128 elems, type `int64`, 177024 bytes (172.875 kb)
compute_IoU          function     <function compute_IoU at 0x7f751bfb3268>
data_path            str          ../data/
height               int          360
img                  list         n=10
img_path             str          ../data/images/
imgs                 list         n=20000
loc_act              list         n=10
mask                 str          14789_2.mat
mask_path            str          ../data/mask/
masks                list         n=99610
mat                  ndarray   