In [None]:
import numpy as np
import tensorflow as tf
import keras
from keras.preprocessing import image
from keras.applications.imagenet_utils import preprocess_input
import model.VqaQualityModel as VQM 
from tqdm import tqdm
import os, json, nltk, csv
import sklearn.metrics
os.environ["CUDA_VISIBLE_DEVICES"]="1"

In [None]:
data = json.load(open('./data/vqa_quality_merger.json'))

In [None]:
# create the model:
fmap_source = 'resnet152' # feature map source: detectron or resnet152
# main model
model = VQM.VqaQualityModel(fmap_source)

# inputs
f_map = tf.placeholder(tf.float32, shape=[None] + model.options['img_feat_shape'])
q_input = tf.placeholder(tf.int32, shape=[None, model.options['question_len']])

pred = model.build_graph(f_map, q_input)
sess = tf.Session()
all_saver = tf.train.Saver() 
all_saver.restore(sess, './ckpt_ans_rec/{}/{}-ckpt'.format(fmap_source, fmap_source)) 

In [None]:
FMAP_PATH = 'fmap/{}/'.format(fmap_source)
SPLIT = 'val' # evaluate on train/val/test set
data_split = data[SPLIT]
ans_pred = np.zeros((len(data_split['image']), 2))
for i, image_name in enumerate(tqdm(data_split['image'])):
    img_feat = np.load(FMAP_PATH+'{}/{}.npy'.format(SPLIT, image_name[:-4])) \
                                            .reshape([1] + model.options['img_feat_shape'])
    enc_question = data_split['question'][i]
    output = sess.run(pred, feed_dict={f_map: img_feat, q_input: [enc_question]})
    ans_pred[i] = 1.0 - output['ans_and_rec']


In [None]:
# evaluation
eval_ = {}        

tmp = np.asarray(data_split['answerable'])
ans_true_ = 1 - tmp
ans_pred_ = ans_pred[:,0]
eval_["ans_ap"] = sklearn.metrics.average_precision_score(ans_true_, ans_pred_)

idx = tmp < 0.5
ans_true_ = 1 - np.asarray(data_split['recognizable'])[idx]
ans_pred_ = ans_pred[:,1:][idx]
eval_["rec_ap"] = sklearn.metrics.average_precision_score(ans_true_, ans_pred_)

print(eval_["ans_ap"], eval_["rec_ap"])


In [None]:
# directly compute resnet152 features for prediction

# if fmap_source == 'resnet152':
#     resnet152 = keras.applications.ResNet152(include_top=False, weights='imagenet', input_shape=[448, 448, 3])
#     base_model = keras.models.Model(inputs=resnet152.input, outputs=resnet152.get_layer('conv5_block3_add').output)

# SPLIT = 'val' # evaluate on train/val/test set
# data_split = data[SPLIT]
# ans_pred = np.zeros((len(data_split['image']), 2))
# for i, image_name in enumerate(tqdm(data_split['image'])):
#     img = image.load_img(IMG_PATH+'{}/{}'.format(SPLIT, image_name), target_size=(448,448)) 
#     img = image.img_to_array(img)
#     img = np.expand_dims(img, axis=0)
#     img = preprocess_input(img)
#     img_feat = base_model.predict(img)
#     enc_question = data_split['question'][i]
#     output = sess.run(pred, feed_dict={f_map: img_feat, q_input: [enc_question]})
#     ans_pred[i] = 1.0 - output['ans_and_rec']
    
