In [1]:
import torch
import torchvision
import numpy as np
import cv2
import os

feature_model = 'resnet101'
split = 'train'
max_images = None
model_ = 'resnet101'
model_stage = 3
#batch_size = 32
batch_size = 1
img_h = img_w = 224
image_dir = "../dataset/images/tmp"
output_dir = "../processed_images/"

In [2]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0,1,2,3,4,5,6,7"

In [3]:
def build_model(img_dir, output_h5_file, img_h, img_w, model, model_stage=3,
                batch_size=64):
    if not hasattr(torchvision.models, model):
        raise ValueError('Invalid model "%s"' % model)
    if not 'resnet' in model:
        raise ValueError('Feature extraction only supports ResNets')
    cnn = getattr(torchvision.models, model)(pretrained=True)
    layers = [cnn.conv1, cnn.bn1, cnn.relu, cnn.maxpool]
    for i in range(model_stage):
        name = 'layer%d' % (i+1)
        layers.append(getattr(cnn, name))
    model = torch.nn.Sequential(*layers)

    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model.to(device)

    model.eval()
    return model

def run_batch(cur_batch, model):
    mean = np.array([0.485, 0.456, 0.406]).reshape(1,3,1,1) #Comes from CLEVR
    std = np.array([0.229, 0.224, 0.224]).reshape(1,3,1,1) #Comes from CLEVR

    image_batch = np.concatenate(cur_batch, 0).astype(np.float32)
    image_batch = (image_batch / 255.0 - mean) / std
    image_batch = torch.FloatTensor(image_batch).cuda()
    with torch.no_grad():
        image_batch = torch.autograd.Variable(image_batch)

    print(model(image_batch))
    feats = model(image_batch)
    feats = feats.data.cpu().clone().numpy()

    return feats

In [4]:
model = build_model(image_dir, output_dir, img_h, img_w, model_,
                    model_stage=model_stage, batch_size=batch_size)

input_paths = []
idx_set = set()

for fn in os.listdir(image_dir):
    if not fn.endswith('.jpg'):
        continue
    #idx = int(os.path.splitext(fn)[0].split('_')[-1])
    idx = os.path.splitext(fn)[0].split('.jpg')[-1]
    input_paths.append((os.path.join(image_dir, fn), idx))
    idx_set.add(idx)

#print(input_paths)    
input_paths.sort(key=lambda x: x[1])
assert len(idx_set) == len(input_paths)
#assert min(idx_set) == 0 and max(idx_set) == len(idx_set) - 1

if max_images is not None:
    input_paths = input_paths[:max_images]

img_size = (img_h, img_w)
feat_dset = None
i0 = 0
cur_batch = []
paths = []
a = None
for i, (path, idx) in enumerate(input_paths):
    img = cv2.imread(path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, img_size, interpolation=cv2.INTER_CUBIC)
    img = img.transpose(2,0,1)[None]
    #Old code for scipy.misc.imread
    #img = imread(path, mode='RGB')
    #img = imresize(img, img_size, interp='bicubic')
    #img = img.transpose(2,0,1)[None]
    cur_batch.append(img)
    paths.append(path)
    if len(cur_batch) == batch_size:
        feats = run_batch(cur_batch, model)
        a = feats
        for j in range(feats.shape[0]):
            #dim of feats is (1, 1024, 14, 14)
            #print(feats)
            torch.save(feats[j], output_dir + paths[j].split('/')[-1])
        i1 = i0 + len(cur_batch)
        i0 = i1
        print('Processed %d / %d images' % (i1, len(input_paths)))
        cur_batch = []
        paths = []
if len(cur_batch) > 0:
    feats = run_batch(cur_batch, model)
    for j in range(feats.shape[0]):
        torch.save(feats[j], output_dir + paths[j].split('/')[-1])
        #Files are saved with .png extension, slighty ambigious.
    i1 = i0 + len(cur_batch)
    print('Processed %d / %d images' % (i1, len(input_paths)))

tensor([[[[0.0000e+00, 0.0000e+00, 1.5704e-02,  ..., 0.0000e+00,
           0.0000e+00, 0.0000e+00],
          [0.0000e+00, 5.8342e-02, 0.0000e+00,  ..., 7.4527e-02,
           4.5028e-01, 7.1550e-03],
          [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 8.2360e-02,
           0.0000e+00, 0.0000e+00],
          ...,
          [8.9467e-03, 5.9595e-03, 0.0000e+00,  ..., 2.7879e-02,
           0.0000e+00, 9.9181e-03],
          [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 1.2355e-01,
           0.0000e+00, 2.3575e-01],
          [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 1.4289e-01,
           0.0000e+00, 0.0000e+00]],

         [[0.0000e+00, 1.6744e-01, 3.6998e-02,  ..., 7.7559e-02,
           0.0000e+00, 0.0000e+00],
          [0.0000e+00, 5.5155e-02, 1.6503e-02,  ..., 0.0000e+00,
           0.0000e+00, 0.0000e+00],
          [5.4117e-02, 5.9973e-02, 1.0295e-01,  ..., 0.0000e+00,
           0.0000e+00, 0.0000e+00],
          ...,
          [3.3720e-01, 7.9978e-01, 4.0420e-01,  ..., 0.0000

In [6]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
#plt.imshow(mpimg.imread('../processed_images/n351495.png'))

In [12]:
!ls

 data_loader.py         model.py	       requirements.txt
 feature_extractor.py   preprocess.py	       setup_environment.sh
 get_data.sh	        processedn351495.jpg   Untitled.ipynb
'Meta Module Files'     README.md


In [7]:
#plt.imshow(mpimg.imread('/nobackup/s144454/dataset/images/images/n351495.jpg'))

/nobackup/s144454/MasterQA


In [8]:
!pip install pickle

[31mERROR: Could not find a version that satisfies the requirement pickle (from versions: none)[0m
[31mERROR: No matching distribution found for pickle[0m


In [9]:
import pickle

In [5]:

#(1, 1024, 14, 14)
a.unsqueeze(0).shape

AttributeError: 'numpy.ndarray' object has no attribute 'unsqueeze'

In [6]:
#gqa_0000021.npz gqa_0000022.npz gqa_0000023.npz
b = np.load('/nobackup/s144454/gqa_bottom_up_features/gqa_0000021.npz')


In [7]:
type(b)

numpy.lib.npyio.NpzFile

In [8]:
b.files

['norm_bb', 'features', 'conf', 'soft_labels']

In [9]:
print(b['norm_bb'].shape)
b['norm_bb']

(100, 6)


array([[0.38178048, 0.        , 0.615426  , 0.21292193, 0.23364553,
        0.21292193],
       [0.        , 0.19277833, 0.23732951, 0.4682653 , 0.23732951,
        0.27548695],
       [0.22351988, 0.13192014, 0.7934534 , 0.5414548 , 0.56993353,
        0.40953463],
       [0.56001574, 0.00493135, 0.97627044, 0.38954508, 0.4162547 ,
        0.38461372],
       [0.62875885, 0.2208976 , 0.96342367, 0.90829694, 0.33466482,
        0.6873993 ],
       [0.38347116, 0.3797321 , 0.8115892 , 0.8966243 , 0.42811802,
        0.5168922 ],
       [0.4980761 , 0.34867486, 0.7043347 , 0.52010196, 0.20625857,
        0.1714271 ],
       [0.42374897, 0.75872344, 0.56795686, 0.9709431 , 0.1442079 ,
        0.21221966],
       [0.26093358, 0.563816  , 0.40258798, 0.77233315, 0.1416544 ,
        0.20851713],
       [0.44514984, 0.5136531 , 0.7212176 , 0.6955453 , 0.27606773,
        0.18189222],
       [0.        , 0.        , 0.7279887 , 0.68442744, 0.7279887 ,
        0.68442744],
       [0.11304537, 0

In [10]:
print(b['features'].shape)
b['features']

(100, 2048)


array([[0.0000000e+00, 0.0000000e+00, 0.0000000e+00, ..., 6.4351703e-03,
        5.0339041e+00, 0.0000000e+00],
       [1.6083232e+00, 9.0700245e-01, 1.4312448e-01, ..., 0.0000000e+00,
        6.3724451e+00, 0.0000000e+00],
       [5.4771644e-01, 0.0000000e+00, 1.1636376e-01, ..., 1.3317237e-02,
        6.6917114e+00, 4.8677391e-01],
       ...,
       [9.7434717e-01, 0.0000000e+00, 9.6031956e-02, ..., 5.3121365e-04,
        4.4495964e+00, 6.6653436e-01],
       [3.9473721e-01, 0.0000000e+00, 3.7367932e-02, ..., 0.0000000e+00,
        8.3003044e+00, 5.3805098e-02],
       [2.8006451e+00, 0.0000000e+00, 1.9404037e-02, ..., 0.0000000e+00,
        3.1190310e+00, 3.1442031e-01]], dtype=float32)

In [17]:
print(b['conf'].shape)
b['conf']

(100, 1)


array([[0.80111897],
       [0.79939294],
       [0.73792964],
       [0.7121331 ],
       [0.70005155],
       [0.6625879 ],
       [0.64582217],
       [0.5794424 ],
       [0.5712555 ],
       [0.5606844 ],
       [0.53566605],
       [0.5286926 ],
       [0.48259372],
       [0.40685418],
       [0.36587235],
       [0.36441398],
       [0.36034265],
       [0.35469964],
       [0.33192042],
       [0.31210378],
       [0.3104159 ],
       [0.2913682 ],
       [0.2900124 ],
       [0.28977627],
       [0.28748044],
       [0.2747092 ],
       [0.26953366],
       [0.2226742 ],
       [0.22134611],
       [0.21822365],
       [0.21641661],
       [0.21282758],
       [0.2113088 ],
       [0.20286207],
       [0.1974359 ],
       [0.17921461],
       [0.17910351],
       [0.17847082],
       [0.17454113],
       [0.16679458],
       [0.16598207],
       [0.16009265],
       [0.1590477 ],
       [0.14990097],
       [0.14266555],
       [0.12474096],
       [0.12323074],
       [0.112

In [21]:
print(b['soft_labels'].shape)
b['soft_labels']

(100, 1601)


array([[1.6560459e-01, 2.1764924e-07, 2.1452988e-07, ..., 7.3437532e-06,
        2.0302809e-06, 1.6493455e-07],
       [1.0722199e-01, 1.2254479e-07, 1.2984245e-07, ..., 1.3909532e-06,
        5.4462407e-07, 1.5051246e-07],
       [1.8000765e-02, 1.5926673e-07, 5.9311208e-07, ..., 3.2351877e-06,
        8.4513789e-07, 7.9031771e-08],
       ...,
       [4.0937319e-02, 2.3760727e-07, 1.2627552e-06, ..., 4.1680883e-06,
        1.1816037e-06, 1.5572014e-07],
       [1.5090437e-01, 1.1163786e-07, 1.5788781e-07, ..., 8.2935998e-07,
        1.0783830e-06, 1.3886721e-07],
       [6.4048320e-02, 5.2735402e-07, 4.9658763e-07, ..., 2.7283711e-06,
        2.3361265e-06, 2.1716103e-07]], dtype=float32)

In [15]:
sum(b['soft_labels'][2])

1.0000008798880273

In [2]:
import utils
import importlib
import numpy as np
from tqdm.notebook import tqdm

importlib.reload(utils)

#print("Number of processors: ", mp.cpu_count())
#pool = mp.Pool(mp.cpu_count())

data = utils.load_obj_tsv('../vg_gqa_imgfeat/gqa_testdev_obj36.tsv')
#pbar = tqdm(total=len(data))

# for item in tqdm(data):
#     np.savez('Features/train/{}'.format(item['img_id']), **item)

#results = process_map([np.savez, ('Features/{}'.format(item['img_id']), item) for item in data], workers=mp.cpu_count())

#pool.close()

#np.savez('Features/{}'.format(item['img_id']), **item)
print(data[0])
print(data[1])


Start to load Faster-RCNN detected objects from ../vg_gqa_imgfeat/gqa_testdev_obj36.tsv
Loaded 398 images in file ../vg_gqa_imgfeat/gqa_testdev_obj36.tsv in 2 seconds.
{'img_id': 'n216553', 'img_h': 427, 'img_w': 640, 'objects_id': array([ 283,   42,  236,  236,   47,  440,   51,   50,  840,  465,  234,
        236,  291,  234,   90,  242,   42,   50,  364,   50,  440,  381,
        119,  283, 1305,  683, 1180,  683,  465,  234,  245,  236,   50,
        291,  291,  465]), 'objects_conf': array([0.9735816 , 0.9599596 , 0.91249406, 0.8937951 , 0.7844286 ,
       0.7790787 , 0.64471906, 0.62560457, 0.59041655, 0.5752458 ,
       0.55702615, 0.5543998 , 0.49020204, 0.4821636 , 0.461907  ,
       0.43933845, 0.43347633, 0.3900281 , 0.379403  , 0.3479575 ,
       0.33917415, 0.31995302, 0.3168483 , 0.31294194, 0.27822277,
       0.26683944, 0.25012794, 0.24788943, 0.24348071, 0.5182077 ,
       0.19248883, 0.31137088, 0.18842983, 0.41780132, 0.18587452,
       0.23433094], dtype=float32), '

In [40]:
import json

# with open('../processed/questions/trainval_balanced_inputs.json') as f:
#         data = json.load(f)
#         
# with open('Meta Module Files/meta_info/forbidden.json', 'r') as f:
#         forbidden = json.load(f)

test = data[-4:]
test.append(data[0])
forbidden = set(forbidden)

test2 = list(filter(lambda x: x[-2] not in forbidden, data))

# for i, t in enumerate(test):
#     print(t[-2])
#     print(t[-2] not in forbidden)
    
print(len(test2))

# for i, t in enumerate(test):
#     print(t[-2])
#     print(t[-2] not in forbidden)

943000
