-
Notifications
You must be signed in to change notification settings - Fork 30
/
extract_img_features_attention.py
114 lines (97 loc) · 4.57 KB
/
extract_img_features_attention.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
"""
- ASSUMES: that the image dataset has been manually split such that all train
images are stored in "coco/images/train/", all test images are stored in
"coco/images/test/" and all val images are stored in "coco/images/val". That
the Inception-V3 model has been downloaded and placed in inception. That the
dict numpy_params (containing W_img and b_img taken from the img_transform
step in a well-performing non-attention model) is placed in
coco/data/img_features_attention/transform_params.
- DOES: extracts a 64x300 feature array (64 300 dimensional feature vectors,
one each for 8x8 different img regions) for each train/val/test img and saves
each individual feature array to disk (to coco/data/img_features_attention).
Is used in the attention models.
"""
import os
import re
import tensorflow as tf
import tensorflow.python.platform
from tensorflow.python.platform import gfile
import numpy as np
import cPickle
from utilities import log
from extract_img_features import load_pretrained_CNN
def extract_img_features_attention(img_paths, demo=False):
"""
- Runs every image in "img_paths" through the pretrained CNN and
saves their respective feature array (the third-to-last layer
of the CNN transformed to 64x300) to disk.
"""
# load the Inception-V3 model:
load_pretrained_CNN()
# load the parameters for the feature vector transform:
transform_params = cPickle.load(open(
"/home/fregu856/CS224n/project/CS224n_project/coco/data/img_features_attention/transform_params/numpy_params"))
W_img = transform_params["W_img"]
b_img = transform_params["b_img"]
with tf.Session() as sess:
# get the third-to-last layer in the Inception-V3 model (a tensor
# of shape (1, 8, 8, 2048)):
img_features_tensor = sess.graph.get_tensor_by_name("mixed_10/join:0")
# reshape the tensor to shape (64, 2048):
img_features_tensor = tf.reshape(img_features_tensor, (64, 2048))
# apply the img transorm (get a tensor of shape (64, 300)):
linear_transform = tf.matmul(img_features_tensor, W_img) + b_img
img_features_tensor = tf.nn.sigmoid(linear_transform)
for step, img_path in enumerate(img_paths):
if step % 10 == 0:
print step
log(str(step))
# read the image:
img_data = gfile.FastGFile(img_path, "rb").read()
try:
# get the img features (np array of shape (64, 300)):
img_features = sess.run(img_features_tensor,
feed_dict={"DecodeJpeg/contents:0": img_data})
#img_features = np.float16(img_features)
except:
print "JPEG error for:"
print img_path
print "******************"
log("JPEG error for:")
log(img_path)
log("******************")
else:
if not demo:
# get the image id:
img_name = img_path.split("/")[3]
img_id = img_name.split("_")[2].split(".")[0].lstrip("0")
img_id = int(img_id)
else: # (if demo:)
# we're only extracting features for one img, (arbitrarily)
# set the img id to -1:
img_id = -1
# save the img features to disk:
cPickle.dump(img_features,
open("/home/fregu856/CS224n/project/CS224n_project/coco/data/img_features_attention/%d" % img_id, "wb"))
def main():
# define where all val imgs are located:
val_img_dir = "coco/images/val/"
# create a list of the paths to all val imgs:
val_img_paths = [val_img_dir + file_name for file_name in\
os.listdir(val_img_dir) if ".jpg" in file_name]
# define where all test imgs are located:
test_img_dir = "coco/images/test/"
# create a list of the paths to all test imgs:
test_img_paths = [test_img_dir + file_name for file_name in\
os.listdir(test_img_dir) if ".jpg" in file_name]
# define where all train imgs are located:
train_img_dir = "coco/images/train/"
# create a list of the paths to all train imgs:
train_img_paths = [train_img_dir + file_name for file_name in\
os.listdir(train_img_dir) if ".jpg" in file_name]
# create a list of the paths to all imgs:
img_paths = val_img_paths + test_img_paths + train_img_paths
# extract all features:
extract_img_features_attention(img_paths)
if __name__ == '__main__':
main()