Skip to content

Commit

Permalink
[feat] Add script for feature extraction from vmb (#93)
Browse files Browse the repository at this point in the history
* [feat] Add script for feature extraction from vmb

* [fix] Address comments in the PR

* [fix] Address Meet's comments

* [fix] Remove os.path.exists from download_file
  • Loading branch information
apsdehal committed Jun 12, 2019
1 parent 3891da7 commit c6796cc
Show file tree
Hide file tree
Showing 2 changed files with 225 additions and 0 deletions.
199 changes: 199 additions & 0 deletions pythia/scripts/features/extract_features_vmb.py
@@ -0,0 +1,199 @@
# Requires vqa-maskrcnn-benchmark to be built and installed
# Category mapping for visual genome can be downloaded from
# https://dl.fbaipublicfiles.com/pythia/data/visual_genome_categories.json
import argparse
import glob
import os

import cv2
import numpy as np
import torch
from PIL import Image

from maskrcnn_benchmark.config import cfg
from maskrcnn_benchmark.layers import nms
from maskrcnn_benchmark.modeling.detector import build_detection_model
from maskrcnn_benchmark.structures.image_list import to_image_list
from maskrcnn_benchmark.utils.model_serialization import load_state_dict
from pythia.utils.general import download_file


class FeatureExtractor:
MODEL_URL = (
"https://dl.fbaipublicfiles.com/pythia/detectron_model/detectron_model.pth"
)
CONFIG_URL = (
"https://dl.fbaipublicfiles.com/pythia/detectron_model/detectron_model.yaml"
)
MAX_SIZE = 1333
MIN_SIZE = 800
NUM_FEATURES = 100

def __init__(self):
self.args = self.get_parser().parse_args()
self.detection_model = self._build_detection_model()

os.makedirs(self.args.output_folder, exist_ok=True)

def _try_downloading_necessities(self):
if self.args.model_file is None:
print("Downloading model and configuration")
self.args.model_file = self.MODEL_URL.split("/")[-1]
self.args.config_file = self.CONFIG_URL.split("/")[-1]
download_file(self.MODEL_URL)
download_file(self.CONFIG_URL)

def get_parser(self):
parser = argparse.ArgumentParser()
parser.add_argument(
"--model_file", default=None, type=str, help="Detectron model file"
)
parser.add_argument(
"--config_file", default=None, type=str, help="Detectron config file"
)
parser.add_argument("--batch_size", type=int, default=2, help="Batch size")
parser.add_argument(
"--output_folder", type=str, default="./output", help="Output folder"
)
parser.add_argument("--image_dir", type=str, help="Image directory or file")
parser.add_argument(
"--feature_name", type=str, help="The name of the feature to extract",
default="fc6",
)
parser.add_argument(
"--confidence_threshold", type=float, default=0.2,
help="Threshold of detection confidence above which boxes will be selected"
)
return parser

def _build_detection_model(self):
cfg.merge_from_file(self.args.config_file)
cfg.freeze()

model = build_detection_model(cfg)
checkpoint = torch.load(self.args.model_file, map_location=torch.device("cpu"))

load_state_dict(model, checkpoint.pop("model"))

model.to("cuda")
model.eval()
return model

def _image_transform(self, path):
img = Image.open(path)
im = np.array(img).astype(np.float32)
im = im[:, :, ::-1]
im -= np.array([102.9801, 115.9465, 122.7717])
im_shape = im.shape
im_size_min = np.min(im_shape[0:2])
im_size_max = np.max(im_shape[0:2])

# Scale based on minimum size
im_scale = self.MIN_SIZE / im_size_min

# Prevent the biggest axis from being more than max_size
# If bigger, scale it down
if np.round(im_scale * im_size_max) > self.MAX_SIZE:
im_scale = self.MAX_SIZE / im_size_max

im = cv2.resize(
im, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR
)
img = torch.from_numpy(im).permute(2, 0, 1)
return img, im_scale

def _process_feature_extraction(
self, output, im_scales, feature_name="fc6", conf_thresh=0.2
):
batch_size = len(output[0]["proposals"])
n_boxes_per_image = [len(boxes) for boxes in output[0]["proposals"]]
score_list = output[0]["scores"].split(n_boxes_per_image)
score_list = [torch.nn.functional.softmax(x, -1) for x in score_list]
feats = output[0][feature_name].split(n_boxes_per_image)
cur_device = score_list[0].device

feat_list = []
info_list = []

for i in range(batch_size):
dets = output[0]["proposals"][i].bbox / im_scales[i]
scores = score_list[i]
max_conf = torch.zeros((scores.shape[0])).to(cur_device)

for cls_ind in range(1, scores.shape[1]):
cls_scores = scores[:, cls_ind]
keep = nms(dets, cls_scores, 0.5)
max_conf[keep] = torch.where(
cls_scores[keep] > max_conf[keep], cls_scores[keep], max_conf[keep]
)

keep_boxes = torch.argsort(max_conf, descending=True)[:self.NUM_FEATURES]
feat_list.append(feats[i][keep_boxes])
bbox = output[0]["proposals"][i][keep_boxes].bbox / im_scales[i]
objects = torch.argmax(scores[keep_boxes], dim=1)
image_width = output[0]["proposals"][i].size[0] / im_scales[i]
image_height = output[0]["proposals"][i].size[1] / im_scales[i]

info_list.append(
{
"bbox": bbox.cpu().numpy(),
"objects": objects.cpu().numpy(),
"image_width": image_width,
"image_height": image_height,
}
)

return feat_list, info_list

def get_detectron_features(self, image_paths):
img_tensor, im_scales = [], []

for image_path in image_paths:
im, im_scale = self._image_transform(image_path)
img_tensor.append(im)
im_scales.append(im_scale)

# Image dimensions should be divisible by 32, to allow convolutions
# in detector to work
current_img_list = to_image_list(img_tensor, size_divisible=32)
current_img_list = current_img_list.to("cuda")

with torch.no_grad():
output = self.detection_model(current_img_list)
feat_list = self._process_feature_extraction(
output, im_scales, self.args.feature_name, self.args.confidence_threshold
)
return feat_list

def _chunks(self, array, chunk_size):
for i in range(0, len(array), chunk_size):
yield array[i : i + chunk_size]

def _save_feature(self, file_name, feature, info):
file_base_name = os.path.basename(file_name)
file_base_name = file_base_name.split(".")[0]
info_file_base_name = file_base_name + "_info.npy"
file_base_name = file_base_name + ".npy"

np.save(
os.path.join(self.args.output_folder, file_base_name), feature.cpu().numpy()
)
np.save(os.path.join(self.args.output_folder, info_file_base_name), info)

def extract_features(self):
image_dir = self.args.image_dir

if os.path.isfile(image_dir):
features, infos = self.get_detectron_features([image_dir])
self._save_feature(image_dir, features[0], infos[0])
else:
files = glob.glob(os.path.join(image_dir, "*.jpg"))
for chunk in self._chunks(files, self.args.batch_size):
features, infos = self.get_detectron_features(chunk)
for idx, file_name in enumerate(chunk):
self._save_feature(file_name, features[idx], infos[idx])


if __name__ == "__main__":
feature_extractor = FeatureExtractor()
feature_extractor.extract_features()
26 changes: 26 additions & 0 deletions pythia/utils/general.py
Expand Up @@ -4,7 +4,9 @@
import os
from bisect import bisect

import requests
import torch
import tqdm
import yaml
from torch import nn

Expand Down Expand Up @@ -83,6 +85,30 @@ def get_pythia_root():
return pythia_root


def download_file(url, output_dir=".", filename=""):
if len(filename) == 0:
filename = os.path.join(".", url.split("/")[-1])

os.makedirs(output_dir, exist_ok=True)

filename = os.path.join(output_dir, filename)
r = requests.get(url, stream=True)

file_size = int(r.headers["Content-Length"])
chunk_size = 1024 * 1024
num_bars = int(file_size / chunk_size)

with open(filename, "wb") as fh:
for chunk in tqdm.tqdm(
r.iter_content(chunk_size=chunk_size),
total=num_bars,
unit="MB",
desc=filename,
leave=True,
):
fh.write(chunk)


def get_optimizer_parameters(model, config):
parameters = model.parameters()

Expand Down

0 comments on commit c6796cc

Please sign in to comment.