In [1]:
%%capture
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/Colab Notebooks/WhenPigsFlyContext/baselines/Detectron2

!python -m pip install pyyaml==5.1
# Detectron2 has not released pre-built binaries for the latest pytorch (https://github.com/facebookresearch/detectron2/issues/4053)
# so we install from source instead. This takes a few minutes.
!python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'
!pip install ml-collections

In [7]:
%%capture
! pip3 install pickle5

In [10]:
# Some basic setup:
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import os, json, cv2, random
from google.colab.patches import cv2_imshow

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.modeling import build_model
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.utils.events import EventStorage
from detectron2.structures import Boxes, ImageList, Instances, pairwise_iou
import detectron2.data.transforms as T

# import pytorch
import torch
import torch.nn as nn
import torchvision
from torchvision import datasets, models, transforms, utils
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms.functional import to_pil_image, to_tensor

from PIL import Image
from tqdm import tqdm, trange
from random import sample
import pickle
import pickle5 as pickle5
import copy
%matplotlib inline
from matplotlib import pyplot as plt
import sys
sys.path.append("..")
from SCEGRAM.SCEGRAM import SCEGRAM
from utils import *

In [3]:
# transform attention map from rgb to grayscale
transform_single = transforms.Compose(
    [
        transforms.Grayscale(num_output_channels=1),
        transforms.ToTensor()
    ]
) 

# change PIL image to pytorch tensor
img_transforms = transforms.Compose(
    [
        transforms.Resize((800, 1280)),
        transforms.ToTensor(),
    ]
)

# process predicted objectiveness logits to range(0, 1)
sigmoid = torch.nn.Sigmoid()

In [4]:
context_dir = "../../SCEGRAM/01scenes/01object_present"
target_dir = "../../SCEGRAM/02objects"
info_dir = "../../SCEGRAM/SCEGRAM_Database_scenes_objects.xlsx"
context_size, target_size = (224, 224), (224, 224)
dataset = SCEGRAM(info_dir, context_dir, target_dir, context_size, target_size, is_transform=False)

  warn(msg)


In [9]:
with open("../IVSN/[SCEGRAM]bin_idxs.pkl", "rb") as tf:
    bin_info = pickle5.load(tf) 

In [5]:
cfg = get_cfg()
# add project-specific config (e.g., TensorMask) here if you're not running a model in detectron2's core library
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # set threshold for this model
# Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as well
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
aug = T.ResizeShortestEdge(
            [cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MIN_SIZE_TEST], cfg.INPUT.MAX_SIZE_TEST
        )

predictor = DefaultPredictor(cfg)
model = build_model(cfg)
DetectionCheckpointer(model).load(cfg.MODEL.WEIGHTS)

model_final_f10217.pkl: 178MB [00:05, 29.8MB/s]                           


{'__author__': 'Detectron2 Model Zoo'}

In [18]:
size, img_size, original_size = 48, (320, 512), (1024, 1280)
detectron2_CON_0_25, detectron2_CON_25_50 = [], []
detectron2_INCON_0_25, detectron2_INCON_25_50 = [], []
scanpath, detectron2_attention_map = {}, {}
selected_imgs = bin_info['con_(0, 25]'].tolist() + bin_info['con_(25, 50]'].tolist() + bin_info['incon_(0, 25]'].tolist() + bin_info['incon_(25, 50]'].tolist()

for id in trange(0, len(dataset)):
    if id not in selected_imgs:
        continue

    img, _, bbox_relative, category = dataset[id]
    # get the target bounding box
    tg_loc = bbox_cordinates(bbox_relative, img_size[1], img_size[0])

    # get the img path
    image = aug.get_transform(img).apply_image(img)
    img_tensor = torch.as_tensor(image.astype("float32").transpose(2, 0, 1)).cuda()
    img_tensor = transforms.Resize(original_size)(img_tensor)
    imgList = ImageList.from_tensors([img_tensor])

    # compute the attention map
    model.eval()
    with torch.no_grad():
        features = model.backbone(img_tensor.unsqueeze(0))
        mask_features = [features[f] for f in features.keys()]
        pred_objectness_logits, _ = model.proposal_generator.rpn_head(mask_features)

    attention_map = torch.zeros((3, 320, 512)).detach().cuda()
    for i in range(len(pred_objectness_logits)):
        logits = transforms.Resize(img_size)(pred_objectness_logits[i].squeeze(0)).cuda()
        logits = sigmoid(logits)
        attention_map += logits

    attention_map = attention_map/len(pred_objectness_logits)
    attention_map = torch.divide(attention_map, attention_map.max())
    attention_map = transform_single(to_pil_image(attention_map))

    # save the attention map
    detectron2_attention_map[id] = copy.deepcopy(attention_map)

    search_num, path = searchProcesswithPath(tg_loc, attention_map, img_size, size)
    scanpath[id] = path
    
    if id in bin_info['con_(0, 25]'].tolist():
        detectron2_CON_0_25.append(search_num)
    elif id in bin_info['con_(25, 50]'].tolist():
        detectron2_CON_25_50.append(search_num)

    elif id in bin_info['incon_(0, 25]'].tolist():
        detectron2_INCON_0_25.append(search_num)
    elif id in bin_info['incon_(25, 50]'].tolist():
        detectron2_INCON_25_50.append(search_num)

    print('detectron2_' + str(id) + ': ' + str(search_num), end = '\t')

detectron2_CON_res = detectron2_CON_0_25 + detectron2_CON_25_50
detectron2_INCON_res = detectron2_INCON_0_25 + detectron2_INCON_25_50
detectron2_res = detectron2_CON_res + detectron2_INCON_res

  0%|          | 1/372 [00:00<01:26,  4.30it/s]

detectron2_0: 5	

  1%|          | 3/372 [00:00<01:17,  4.78it/s]

detectron2_1: 2	detectron2_2: 14	

  1%|▏         | 5/372 [00:01<01:13,  5.00it/s]

detectron2_3: 71	detectron2_4: 6	

  2%|▏         | 9/372 [00:01<00:43,  8.27it/s]

detectron2_5: 55	detectron2_8: 1	

  3%|▎         | 13/372 [00:01<00:35, 10.06it/s]

detectron2_9: 1	detectron2_12: 1	

  5%|▍         | 17/372 [00:02<00:34, 10.43it/s]

detectron2_14: 4	detectron2_16: 34	

  5%|▌         | 19/372 [00:02<00:33, 10.59it/s]

detectron2_18: 18	detectron2_19: 25	

  6%|▌         | 22/372 [00:02<00:46,  7.47it/s]

detectron2_20: 25	detectron2_21: 4	

  8%|▊         | 31/372 [00:03<00:21, 16.12it/s]

detectron2_23: 15	detectron2_30: 1	

  9%|▉         | 33/372 [00:03<00:29, 11.45it/s]

detectron2_31: 1	detectron2_32: 5	

  9%|▉         | 35/372 [00:04<00:36,  9.18it/s]

detectron2_33: 13	detectron2_34: 8	

 11%|█         | 40/372 [00:04<00:29, 11.10it/s]

detectron2_35: 29	detectron2_39: 3	

 13%|█▎        | 49/372 [00:04<00:19, 16.60it/s]

detectron2_44: 60	detectron2_48: 5	

 14%|█▎        | 51/372 [00:05<00:27, 11.72it/s]

detectron2_49: 22	detectron2_50: 3	

 18%|█▊        | 67/372 [00:05<00:11, 27.52it/s]

detectron2_51: 8	detectron2_66: 1	

 20%|█▉        | 73/372 [00:05<00:10, 28.70it/s]

detectron2_72: 27	detectron2_73: 43	detectron2_74: 2	detectron2_75: 2	

 21%|██        | 77/372 [00:06<00:19, 14.80it/s]

detectron2_76: 19	detectron2_77: 28	

 22%|██▏       | 80/372 [00:07<00:26, 11.08it/s]

detectron2_78: 33	detectron2_79: 4	

 22%|██▏       | 82/372 [00:07<00:30,  9.51it/s]

detectron2_80: 10	detectron2_81: 3	

 23%|██▎       | 84/372 [00:07<00:34,  8.38it/s]

detectron2_82: 4	detectron2_83: 5	

 23%|██▎       | 86/372 [00:08<00:37,  7.54it/s]

detectron2_84: 5	detectron2_85: 5	

 24%|██▎       | 88/372 [00:08<00:41,  6.83it/s]

detectron2_86: 6	detectron2_87: 15	

 24%|██▍       | 90/372 [00:08<00:45,  6.19it/s]

detectron2_88: 36	detectron2_89: 43	

 25%|██▍       | 92/372 [00:09<00:47,  5.87it/s]

detectron2_90: 5	detectron2_91: 6	

 25%|██▌       | 93/372 [00:09<00:50,  5.53it/s]

detectron2_92: 110	

 26%|██▌       | 95/372 [00:09<00:52,  5.28it/s]

detectron2_93: 110	detectron2_94: 37	

 26%|██▌       | 97/372 [00:10<00:51,  5.32it/s]

detectron2_95: 17	detectron2_96: 3	

 27%|██▋       | 99/372 [00:10<00:51,  5.33it/s]

detectron2_97: 2	detectron2_98: 4	

 27%|██▋       | 101/372 [00:10<00:50,  5.33it/s]

detectron2_99: 2	detectron2_100: 36	

 28%|██▊       | 103/372 [00:11<00:49,  5.38it/s]

detectron2_101: 4	detectron2_102: 2	

 28%|██▊       | 105/372 [00:11<00:49,  5.41it/s]

detectron2_103: 2	detectron2_104: 7	

 29%|██▉       | 109/372 [00:11<00:25, 10.27it/s]

detectron2_108: 2	detectron2_109: 2	

 30%|███       | 112/372 [00:12<00:35,  7.34it/s]

detectron2_110: 9	detectron2_111: 47	

 30%|███       | 113/372 [00:12<00:38,  6.70it/s]

detectron2_112: 63	

 31%|███       | 115/372 [00:13<00:43,  5.96it/s]

detectron2_113: 81	detectron2_114: 2	

 31%|███▏      | 117/372 [00:13<00:44,  5.70it/s]

detectron2_115: 2	detectron2_116: 2	

 32%|███▏      | 119/372 [00:13<00:45,  5.50it/s]

detectron2_117: 2	detectron2_118: 6	

 33%|███▎      | 121/372 [00:14<00:46,  5.40it/s]

detectron2_119: 9	detectron2_120: 2	

 33%|███▎      | 123/372 [00:14<00:47,  5.26it/s]

detectron2_121: 2	detectron2_122: 15	

 33%|███▎      | 124/372 [00:14<00:47,  5.23it/s]

detectron2_123: 9	

 34%|███▍      | 126/372 [00:15<00:47,  5.13it/s]

detectron2_124: 48	detectron2_125: 14	

 34%|███▍      | 128/372 [00:15<00:47,  5.17it/s]

detectron2_126: 1	detectron2_127: 19	

 35%|███▍      | 130/372 [00:15<00:46,  5.16it/s]

detectron2_128: 3	detectron2_129: 5	

 35%|███▌      | 131/372 [00:16<00:47,  5.03it/s]

detectron2_130: 98	

 36%|███▋      | 135/372 [00:16<00:29,  8.01it/s]

detectron2_131: 98	detectron2_134: 19	

 39%|███▉      | 145/372 [00:16<00:12, 18.80it/s]

detectron2_135: 44	detectron2_144: 5	

 40%|███▉      | 147/372 [00:17<00:17, 12.82it/s]

detectron2_145: 2	detectron2_146: 8	

 41%|████      | 151/372 [00:17<00:18, 11.92it/s]

detectron2_147: 2	detectron2_150: 2	

 41%|████      | 153/372 [00:18<00:24,  9.07it/s]

detectron2_151: 2	detectron2_152: 3	

 42%|████▏     | 155/372 [00:18<00:28,  7.68it/s]

detectron2_153: 10	detectron2_154: 6	

 42%|████▏     | 157/372 [00:18<00:32,  6.67it/s]

detectron2_155: 76	detectron2_156: 3	

 43%|████▎     | 159/372 [00:19<00:35,  6.06it/s]

detectron2_157: 3	detectron2_158: 1	

 44%|████▎     | 162/372 [00:19<00:29,  7.19it/s]

detectron2_159: 1	detectron2_161: 2	

 47%|████▋     | 176/372 [00:19<00:09, 21.19it/s]

detectron2_170: 2	detectron2_175: 10	

 51%|█████     | 188/372 [00:20<00:06, 27.99it/s]

detectron2_177: 4	detectron2_187: 8	

 52%|█████▏    | 193/372 [00:20<00:06, 27.55it/s]

detectron2_192: 8	detectron2_193: 5	

 53%|█████▎    | 196/372 [00:21<00:11, 15.07it/s]

detectron2_194: 40	detectron2_195: 9	detectron2_196: 9	detectron2_197: 35	

 53%|█████▎    | 199/372 [00:21<00:16, 10.71it/s]

detectron2_198: 3	detectron2_199: 6	

 54%|█████▍    | 201/372 [00:22<00:18,  9.13it/s]

detectron2_200: 2	detectron2_201: 2	

 55%|█████▍    | 203/372 [00:22<00:20,  8.05it/s]

detectron2_202: 1	detectron2_203: 9	

 55%|█████▌    | 206/372 [00:22<00:24,  6.89it/s]

detectron2_204: 13	detectron2_205: 29	

 56%|█████▌    | 208/372 [00:23<00:25,  6.33it/s]

detectron2_206: 13	detectron2_207: 5	

 56%|█████▋    | 210/372 [00:23<00:27,  5.96it/s]

detectron2_208: 1	detectron2_209: 1	

 57%|█████▋    | 212/372 [00:24<00:28,  5.69it/s]

detectron2_210: 2	detectron2_211: 2	

 57%|█████▋    | 213/372 [00:24<00:44,  3.57it/s]

detectron2_212: 8	

 58%|█████▊    | 214/372 [00:25<01:02,  2.54it/s]

detectron2_213: 8	

 58%|█████▊    | 215/372 [00:26<01:16,  2.06it/s]

detectron2_214: 43	

 58%|█████▊    | 216/372 [00:26<01:37,  1.60it/s]

detectron2_215: 122	

 59%|█████▉    | 220/372 [00:27<00:51,  2.92it/s]

detectron2_219: 10	

 61%|██████    | 226/372 [00:28<00:36,  3.98it/s]

detectron2_225: 5	

 62%|██████▏   | 231/372 [00:29<00:30,  4.65it/s]

detectron2_230: 1	

 63%|██████▎   | 235/372 [00:30<00:31,  4.36it/s]

detectron2_234: 2	

 63%|██████▎   | 236/372 [00:31<00:36,  3.68it/s]

detectron2_235: 2	

 64%|██████▎   | 237/372 [00:31<00:44,  3.06it/s]

detectron2_236: 1	

 64%|██████▍   | 238/372 [00:32<00:48,  2.77it/s]

detectron2_237: 1	

 65%|██████▍   | 240/372 [00:33<00:48,  2.74it/s]

detectron2_239: 67	

 65%|██████▍   | 241/372 [00:34<01:01,  2.13it/s]

detectron2_240: 5	

 65%|██████▌   | 243/372 [00:34<00:50,  2.54it/s]

detectron2_242: 8	

 66%|██████▌   | 245/372 [00:35<00:44,  2.85it/s]

detectron2_244: 8	

 66%|██████▋   | 247/372 [00:36<00:46,  2.71it/s]

detectron2_246: 1	

 67%|██████▋   | 248/372 [00:36<00:47,  2.64it/s]

detectron2_247: 1	

 67%|██████▋   | 249/372 [00:36<00:49,  2.47it/s]

detectron2_248: 10	

 67%|██████▋   | 250/372 [00:37<00:57,  2.12it/s]

detectron2_249: 9	

 68%|██████▊   | 252/372 [00:38<00:51,  2.35it/s]

detectron2_251: 25	

 73%|███████▎  | 271/372 [00:39<00:11,  8.94it/s]

detectron2_270: 3	

 73%|███████▎  | 272/372 [00:40<00:15,  6.47it/s]

detectron2_271: 3	

 73%|███████▎  | 273/372 [00:40<00:19,  5.04it/s]

detectron2_272: 43	

 74%|███████▎  | 274/372 [00:41<00:23,  4.11it/s]

detectron2_273: 81	

 74%|███████▍  | 275/372 [00:42<00:27,  3.49it/s]

detectron2_274: 11	

 74%|███████▍  | 276/372 [00:42<00:31,  3.08it/s]

detectron2_275: 9	

 76%|███████▌  | 283/372 [00:43<00:18,  4.81it/s]

detectron2_282: 1	

 76%|███████▋  | 284/372 [00:44<00:26,  3.35it/s]

detectron2_283: 1	

 77%|███████▋  | 285/372 [00:45<00:28,  3.02it/s]

detectron2_284: 2	

 77%|███████▋  | 286/372 [00:45<00:34,  2.53it/s]

detectron2_285: 5	

 77%|███████▋  | 287/372 [00:46<00:34,  2.44it/s]

detectron2_286: 3	

 77%|███████▋  | 288/372 [00:46<00:37,  2.26it/s]

detectron2_287: 5	

 81%|████████  | 301/372 [00:47<00:10,  7.07it/s]

detectron2_300: 1	

 81%|████████  | 302/372 [00:48<00:14,  4.71it/s]

detectron2_301: 1	

 81%|████████▏ | 303/372 [00:49<00:18,  3.82it/s]

detectron2_302: 1	

 82%|████████▏ | 304/372 [00:50<00:20,  3.34it/s]

detectron2_303: 1	

 83%|████████▎ | 308/372 [00:50<00:14,  4.39it/s]

detectron2_307: 6	

 83%|████████▎ | 309/372 [00:51<00:16,  3.75it/s]

detectron2_308: 7	

 83%|████████▎ | 310/372 [00:51<00:19,  3.25it/s]

detectron2_309: 7	

 86%|████████▌ | 319/372 [00:52<00:09,  5.86it/s]

detectron2_318: 1	

 88%|████████▊ | 326/372 [00:53<00:07,  6.40it/s]

detectron2_325: 2	

 88%|████████▊ | 327/372 [00:54<00:09,  4.64it/s]

detectron2_326: 7	

 88%|████████▊ | 328/372 [00:54<00:10,  4.10it/s]

detectron2_327: 2	

 89%|████████▊ | 330/372 [00:55<00:10,  4.01it/s]

detectron2_329: 4	

 91%|█████████ | 337/372 [00:56<00:06,  5.32it/s]

detectron2_336: 4	

 91%|█████████ | 338/372 [00:57<00:08,  4.20it/s]

detectron2_337: 2	

 91%|█████████ | 339/372 [00:57<00:09,  3.35it/s]

detectron2_338: 14	

 91%|█████████▏| 340/372 [00:58<00:10,  2.96it/s]

detectron2_339: 57	

 93%|█████████▎| 345/372 [00:58<00:06,  4.49it/s]

detectron2_344: 56	

 93%|█████████▎| 346/372 [00:59<00:06,  3.80it/s]

detectron2_345: 58	

 94%|█████████▍| 351/372 [01:00<00:04,  4.72it/s]

detectron2_350: 5	

 95%|█████████▍| 352/372 [01:01<00:05,  3.47it/s]

detectron2_351: 4	

 95%|█████████▌| 354/372 [01:01<00:04,  3.78it/s]

detectron2_353: 2	

 96%|█████████▌| 356/372 [01:02<00:04,  3.48it/s]

detectron2_355: 4	

 97%|█████████▋| 361/372 [01:03<00:02,  4.29it/s]

detectron2_360: 12	

 97%|█████████▋| 362/372 [01:04<00:03,  3.15it/s]

detectron2_361: 20	

 98%|█████████▊| 363/372 [01:04<00:03,  2.67it/s]

detectron2_362: 9	

 98%|█████████▊| 365/372 [01:05<00:02,  2.71it/s]

detectron2_364: 71	

 99%|█████████▉| 370/372 [01:06<00:00,  3.97it/s]

detectron2_369: 2	

100%|██████████| 372/372 [01:06<00:00,  5.56it/s]

detectron2_371: 43	




In [19]:
np.mean(detectron2_res), np.mean(detectron2_CON_res), np.mean(detectron2_INCON_res)

(15.684491978609625, 5.59375, 17.767741935483873)

In [20]:
len(detectron2_res) == len(detectron2_CON_res) + len(detectron2_INCON_res)

True

In [23]:
def sampleIncon(incon_bin_result, con_bin_result, times):
    sample_times = times
    nums = len(con_bin_result)
    res = np.array([0.0] * 25)
    
    for id in range(sample_times):
        temp = sample(incon_bin_result, nums)
        temp_accu = model_performance(temp, len(temp))
        res += np.array(temp_accu[:25])

    return (res/sample_times).tolist()

def balanced_accu(res_con, res_incon):
    res = []
    for i in range(25):
        res.append((res_con[i]+res_incon[i])/2)

    return res

In [24]:
times = 100
detectron2_CON_accu = model_performance(detectron2_CON_res, len(detectron2_CON_res))
detectron2_INCON_accu = sampleIncon(detectron2_INCON_res, detectron2_CON_res, times)
detectron2_accu = balanced_accu(detectron2_CON_accu, detectron2_INCON_accu)
detectron2_accu[:11], detectron2_CON_accu[:11], detectron2_INCON_accu[:11]

([0.0,
  0.17375000000000002,
  0.3678125,
  0.45734375,
  0.50734375,
  0.63359375,
  0.65703125,
  0.66859375,
  0.70953125,
  0.740625,
  0.7557812500000001],
 [0,
  0.25,
  0.46875,
  0.59375,
  0.625,
  0.8125,
  0.8125,
  0.8125,
  0.84375,
  0.84375,
  0.84375],
 [0.0,
  0.0975,
  0.266875,
  0.3209375,
  0.3896875,
  0.4546875,
  0.5015625,
  0.5246875,
  0.5753125,
  0.6375,
  0.6678125])

In [26]:
detectron2_SCEGRAM_res = {}
detectron2_SCEGRAM_res['combined_accu'] = detectron2_accu
detectron2_SCEGRAM_res['con_accu'] = detectron2_CON_accu
detectron2_SCEGRAM_res['incon_accu'] = detectron2_INCON_accu
detectron2_SCEGRAM_res['con_[0,25)'] = detectron2_CON_0_25
detectron2_SCEGRAM_res['con_[25,50)'] = detectron2_CON_25_50
detectron2_SCEGRAM_res['incon_[0,25)'] = detectron2_INCON_0_25
detectron2_SCEGRAM_res['incon_[25,50)'] = detectron2_INCON_25_50
detectron2_SCEGRAM_res['scanpath'] = scanpath
detectron2_SCEGRAM_res['attention_map'] = detectron2_attention_map

In [27]:
with open("../results/SCEGRAM/SCEGRAM(invariant_bin1_2)_detectron2_res.pkl", "wb") as tf:
    pickle.dump(detectron2_SCEGRAM_res, tf)