/
detection_features_converter.py
139 lines (116 loc) · 5.14 KB
/
detection_features_converter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
"""
Reads in a tsv file with pre-trained bottom up attention features and
stores it in HDF5 format. Also store {image_id: feature_idx}
as a pickle file.
Hierarchy of HDF5 file:
{ 'image_features': num_images x num_boxes x 2048 array of features
'image_bb': num_images x num_boxes x 4 array of bounding boxes }
"""
from __future__ import print_function
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import base64
import csv
import h5py
import cPickle
import numpy as np
import utils
csv.field_size_limit(sys.maxsize)
FIELDNAMES = ['image_id', 'image_w', 'image_h', 'num_boxes', 'boxes', 'features']
infile = 'data/trainval_36/trainval_resnet101_faster_rcnn_genome_36.tsv'
train_data_file = 'data/train36.hdf5'
val_data_file = 'data/val36.hdf5'
train_indices_file = 'data/train36_imgid2idx.pkl'
val_indices_file = 'data/val36_imgid2idx.pkl'
train_ids_file = 'data/train_ids.pkl'
val_ids_file = 'data/val_ids.pkl'
feature_length = 2048
num_fixed_boxes = 36
if __name__ == '__main__':
h_train = h5py.File(train_data_file, "w")
h_val = h5py.File(val_data_file, "w")
if os.path.exists(train_ids_file) and os.path.exists(val_ids_file):
train_imgids = cPickle.load(open(train_ids_file))
val_imgids = cPickle.load(open(val_ids_file))
else:
train_imgids = utils.load_imageid('data/train2014')
val_imgids = utils.load_imageid('data/val2014')
cPickle.dump(train_imgids, open(train_ids_file, 'wb'))
cPickle.dump(val_imgids, open(val_ids_file, 'wb'))
train_indices = {}
val_indices = {}
train_img_features = h_train.create_dataset(
'image_features', (len(train_imgids), num_fixed_boxes, feature_length), 'f')
train_img_bb = h_train.create_dataset(
'image_bb', (len(train_imgids), num_fixed_boxes, 4), 'f')
train_spatial_img_features = h_train.create_dataset(
'spatial_features', (len(train_imgids), num_fixed_boxes, 6), 'f')
val_img_bb = h_val.create_dataset(
'image_bb', (len(val_imgids), num_fixed_boxes, 4), 'f')
val_img_features = h_val.create_dataset(
'image_features', (len(val_imgids), num_fixed_boxes, feature_length), 'f')
val_spatial_img_features = h_val.create_dataset(
'spatial_features', (len(val_imgids), num_fixed_boxes, 6), 'f')
train_counter = 0
val_counter = 0
print("reading tsv...")
with open(infile, "r+b") as tsv_in_file:
reader = csv.DictReader(tsv_in_file, delimiter='\t', fieldnames=FIELDNAMES)
for item in reader:
item['num_boxes'] = int(item['num_boxes'])
image_id = int(item['image_id'])
image_w = float(item['image_w'])
image_h = float(item['image_h'])
bboxes = np.frombuffer(
base64.decodestring(item['boxes']),
dtype=np.float32).reshape((item['num_boxes'], -1))
box_width = bboxes[:, 2] - bboxes[:, 0]
box_height = bboxes[:, 3] - bboxes[:, 1]
scaled_width = box_width / image_w
scaled_height = box_height / image_h
scaled_x = bboxes[:, 0] / image_w
scaled_y = bboxes[:, 1] / image_h
box_width = box_width[..., np.newaxis]
box_height = box_height[..., np.newaxis]
scaled_width = scaled_width[..., np.newaxis]
scaled_height = scaled_height[..., np.newaxis]
scaled_x = scaled_x[..., np.newaxis]
scaled_y = scaled_y[..., np.newaxis]
spatial_features = np.concatenate(
(scaled_x,
scaled_y,
scaled_x + scaled_width,
scaled_y + scaled_height,
scaled_width,
scaled_height),
axis=1)
if image_id in train_imgids:
train_imgids.remove(image_id)
train_indices[image_id] = train_counter
train_img_bb[train_counter, :, :] = bboxes
train_img_features[train_counter, :, :] = np.frombuffer(
base64.decodestring(item['features']),
dtype=np.float32).reshape((item['num_boxes'], -1))
train_spatial_img_features[train_counter, :, :] = spatial_features
train_counter += 1
elif image_id in val_imgids:
val_imgids.remove(image_id)
val_indices[image_id] = val_counter
val_img_bb[val_counter, :, :] = bboxes
val_img_features[val_counter, :, :] = np.frombuffer(
base64.decodestring(item['features']),
dtype=np.float32).reshape((item['num_boxes'], -1))
val_spatial_img_features[val_counter, :, :] = spatial_features
val_counter += 1
else:
assert False, 'Unknown image id: %d' % image_id
if len(train_imgids) != 0:
print('Warning: train_image_ids is not empty')
if len(val_imgids) != 0:
print('Warning: val_image_ids is not empty')
cPickle.dump(train_indices, open(train_indices_file, 'wb'))
cPickle.dump(val_indices, open(val_indices_file, 'wb'))
h_train.close()
h_val.close()
print("done!")