In [1]:
import os
import cv2
import numpy as np
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
import progressbar
import subprocess
ROOT_DIR = '/auto/homes/bat34/VQA_PartII/'
MODEL_DIR = 'faster-rcnn.pytorch/data/pretrained_model'
MODEL_NAME = 'resnet101_caffe.pth'
MODEL_PATH = os.path.join(ROOT_DIR, MODEL_DIR, MODEL_NAME)
D_ROOT = '/local/scratch/bat34'
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

In [2]:
os.environ['CUDA_VISIBLE_DEVICES']

'1'

In [3]:
if not os.path.exists(os.path.join(ROOT_DIR, MODEL_DIR)):
    subprocess.run(["mkdir", "-p", os.path.join(ROOT_DIR, MODEL_DIR)])
    subprocess.run(["wget", \
                    "https://filebox.ece.vt.edu/~jw2yang/faster-rcnn/pretrained-base-models/resnet101_caffe.pth", \
                    "-P", os.path.join(ROOT_DIR, MODEL_DIR)])

In [4]:
resnet101 = models.resnet101()

In [5]:
resnet101.load_state_dict(torch.load(MODEL_PATH))

<All keys matched successfully>

In [6]:
#remove the last layer, use resnet to extract 2048x1 feature
resnet101 = nn.Sequential(*list(resnet101.children())[:-1])
resnet101.cuda()
resnet101.eval()

Sequential(
  (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
  (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (4): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)


In [7]:
#Preprocess the image so that it conforms with FasterRCNN
PIXEL_MEANS = np.array([[[102.9801, 115.9465, 122.7717]]])
TEST_SCALES = (600,)
TEST_MAX_SIZE = 1000
def process_image(im):
    im_orig = im.astype(np.float32, copy=True)
    im_orig -= PIXEL_MEANS

    im_shape = im_orig.shape
    im_size_min = np.min(im_shape[0:2])
    im_size_max = np.max(im_shape[0:2])
    
    for target_size in TEST_SCALES:
        im_scale = float(target_size) / float(im_size_min)
        # Prevent the biggest axis from being more than MAX_SIZE
        if np.round(im_scale * im_size_max) > TEST_MAX_SIZE:
              im_scale = float(TEST_MAX_SIZE) / float(im_size_max)
        im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale,
                interpolation=cv2.INTER_LINEAR)
    return im

In [None]:
splits = ['train', 'val', 'test']
image_splits = ['train2014', 'val2014', 'test2015']
SAVE_DIR = '/local/scratch/bat34/resnet101-features-2048/{}.pth'
with torch.no_grad():
    for split, image_split in zip(splits, image_splits):
        IMAGE_DIR = os.path.join(D_ROOT, image_split)
        images = os.listdir(IMAGE_DIR)
        images_path = [os.path.join(IMAGE_DIR, file) for file in images]
        start = 0
        for i in progressbar.progressbar(range(start, len(images))):
            #Checkpoint every 2,500 images
            image_name, image = images[i], cv2.imread(images_path[i])
            image = process_image(image)
            image = np.expand_dims(image, axis=0)
            image = torch.from_numpy(image)
            _, H, W, C = list(image.size())
            image = image.view(1, C, H, W)
            image = image.float()
            image = image.cuda()
            out = resnet101(image)
            torch.save(out.cpu(), SAVE_DIR.format(image_name))
        print('Successfully processed {}.'.format(split))
print('Successfully processed all images.')

 51% (42729 of 82783) |#########         | Elapsed Time: 0:45:56 ETA:   0:41:38

In [17]:
#Sanity check
split = ['train2014', 'val2014', 'test2015']
image_splits = ['train2014', 'val2014', 'test2015']
for split, image_split in zip(splits, image_splits):
    RES_FILE = os.path.join(D_ROOT, 'BaselineTraining', split, 'baseline_{}_cnn_features.pth'.format(split))
    IMAGE_DIR = os.path.join(D_ROOT, 'Images', 'mscoco', image_split)
    images = os.listdir(IMAGE_DIR)
    res = torch.load(RES_FILE) if os.path.exists(RES_FILE) else {}
    assert len(res) == len(images)
print('Sanity check passed.')

Sanity check passed.


In [24]:
#Squeezing tensor
import tqdm
split = ['train', 'val', 'test']
image_splits = ['train2014', 'val2014', 'test2015']
for split, image_split in zip(splits, image_splits):
    print("Squeezing {}".format(split))
    RES_FILE = os.path.join(D_ROOT, 'BaselineTraining', split, 'baseline_{}_cnn_features.pth'.format(split))
    IMAGE_DIR = os.path.join(D_ROOT, 'Images', 'mscoco', image_split)
    images = os.listdir(IMAGE_DIR)
    res = torch.load(RES_FILE) if os.path.exists(RES_FILE) else {}
    for image_name, tensor in tqdm.tqdm(res.items()):
        res[image_name] = torch.squeeze(res[image_name])
    torch.save(res, RES_FILE)
    print('Saving {}'.format(split))

Squeezing train2014


100%|██████████| 82783/82783 [00:00<00:00, 291458.44it/s]


Saving train2014
Squeezing val2014


100%|██████████| 40504/40504 [00:00<00:00, 163436.40it/s]


Saving val2014
Squeezing test2015


100%|██████████| 81434/81434 [00:00<00:00, 279405.71it/s]


Saving test2015


In [30]:
#Scratch Space
split = 'train'
RES_FILE = os.path.join(D_ROOT, 'BaselineTraining', split, 'baseline_{}_cnn_features.pth'.format(split))
res = torch.load(RES_FILE) if os.path.exists(RES_FILE) else {}

In [41]:
res_tensor = res[list(res.keys())[0]]

In [33]:
from skipthoughts import UniSkip

In [35]:
txt_enc = UniSkip('auto/homes/bat34/VQA_PartII/data/skipthoughts', ['1', '2', '3'])

  "num_layers={}".format(dropout, num_layers))


In [48]:
txt_tensor = txt_enc(torch.LongTensor([[0, 1, 1]]), [3])
txt_tensor = torch.squeeze(txt_tensor)

In [51]:
torch.cat((res_tensor, txt_tensor), 0).size()

torch.Size([4448])

In [47]:
txt_tensor.size()

torch.Size([1, 2400])