In [3]:
import os

os.environ['CUDA_DEVICE_ORDER']='PCI_BUS_ID'
os.environ['CUDA_VISIBLE_DEVICES']='2' # Change this ID to an unused GPU

In [4]:
import torch
import torch.nn.functional as F
import torchvision
from torch import nn, optim
import cv2
import numpy as np
import json
from PIL import Image

from model.cnn_model import HOCNN

In [25]:
model = "hico/v2_no_lossweights2020-11-13_18-29/" #v2_bias_and_cb_weights_2020-11-14_17-41
checkpoint_path = "checkpoint_10_epoch.pth"

PATH = "checkpoints/" + model + "epoch_train/" + checkpoint_path

In [26]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
checkpoint = torch.load(PATH, map_location=device)
model = HOCNN().to(device)
model.load_state_dict(checkpoint['state_dict'])
model.eval()
print("model loaded")

model loaded


In [27]:
print(model.h_fcn3.weight)
print(model.o_fcn3.weight)
print(model.p_fcn2.weight)

Parameter containing:
tensor([[-0.0052,  0.0060, -0.0074,  ..., -0.0178, -0.0063, -0.0250],
        [-0.0049,  0.0029, -0.0068,  ..., -0.0134,  0.0065, -0.0281],
        [ 0.0076, -0.0126,  0.0036,  ..., -0.0126,  0.0129, -0.0260],
        ...,
        [-0.0263, -0.0727,  0.0062,  ...,  0.0069, -0.0090,  0.0082],
        [-0.0167, -0.0076, -0.0063,  ...,  0.0103,  0.0016, -0.0082],
        [ 0.0108, -0.0289,  0.0127,  ...,  0.0106, -0.0048, -0.0202]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[    -0.0000,     -0.0086,     -0.0617,  ...,     -0.0181,
             -0.0324,     -0.0232],
        [    -0.0075,      0.0097,     -0.0275,  ...,     -0.0568,
             -0.0164,     -0.0168],
        [     0.0001,     -0.0271,     -0.0218,  ...,     -0.1534,
             -0.0299,     -0.0228],
        ...,
        [    -0.0161,     -0.0184,     -0.0179,  ...,      0.0015,
             -0.0094,     -0.0129],
        [    -0.0230,      0.0069,     -0.0119,  ...,

In [28]:
with open('datasets/processed/hico/anno_list.json') as f:
    anno_list = json.load(f)

IMG_PATH = "datasets/hico/images/test2015/HICO_test2015_00000009.jpg"

labels = np.zeros((1, 600))
img = [x for x in anno_list if x['global_id'] == 'HICO_test2015_00000009'][0]
img_data = img['hois'][0]
pos_hois = list(map(int, img['pos_hoi_ids']))
for pos_hoi in pos_hois:
    labels[0][pos_hoi - 1] = 1
human_bboxes = img_data['human_bboxes']
object_bboxes = img_data['object_bboxes']

src = cv2.imread(IMG_PATH)
human_mask = np.zeros_like(src)
for bbox in human_bboxes:
    cv2.rectangle(human_mask, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 255), thickness=-1)
human_bbox_img = cv2.bitwise_and(src, human_mask, mask=None)

obj_mask = np.zeros_like(src)
pairwise_mask = human_mask
for bbox in object_bboxes:
    cv2.rectangle(obj_mask, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 255), thickness=-1)
    cv2.rectangle(pairwise_mask, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 255), thickness=-1)
obj_bbox_img = cv2.bitwise_and(src, obj_mask, mask=None)
pairwise_bbox_img = cv2.bitwise_and(src, pairwise_mask, mask=None)

human_bbox_img = cv2.resize(human_bbox_img, (64, 64), interpolation=cv2.INTER_AREA)
obj_bbox_img = cv2.resize(obj_bbox_img, (64, 64), interpolation=cv2.INTER_AREA)
pairwise_bbox_img = cv2.resize(pairwise_bbox_img, (64, 64), interpolation=cv2.INTER_AREA)

human_bbox_img = torch.from_numpy(human_bbox_img).to(device)
obj_bbox_img = torch.from_numpy(obj_bbox_img).to(device)
pairwise_bbox_img = torch.from_numpy(pairwise_bbox_img).to(device)

res_human_input = human_bbox_img.unsqueeze(0)
res_obj_input = obj_bbox_img.unsqueeze(0)
res_pairwise_input = pairwise_bbox_img.unsqueeze(0)

res_human_input = res_human_input.permute([0,3,1,2]).float().to(device)
res_obj_input = res_obj_input.permute([0,3,1,2]).float().to(device)
res_pairwise_input = res_pairwise_input.permute([0,3,1,2]).float().to(device)

with torch.no_grad(): # Disable gradients for validation
    outputs = model.forward(res_human_input, res_obj_input, res_pairwise_input)
    preds = torch.argmax(outputs, dim=1)

In [29]:
torch.set_printoptions(sci_mode=False)

confidences = F.sigmoid(outputs).squeeze()
label = preds[0].item()
print(F.sigmoid(outputs))
print(confidences[label])
print(np.argmax(labels))

tensor([[    0.0000,     0.0000,     0.0000,     0.0000,     0.0000,     0.0000,
             0.0000,     0.0000,     0.0000,     0.0000,     0.0000,     0.0001,
             0.0000,     0.0000,     0.0001,     0.0000,     0.0000,     0.0000,
             0.0000,     0.0000,     0.0000,     0.0000,     0.0000,     0.0000,
             0.0000,     0.0001,     0.0000,     0.0000,     0.0000,     0.0004,
             0.0000,     0.0000,     0.0000,     0.0000,     0.0000,     0.0000,
             0.0000,     0.0000,     0.0000,     0.0000,     0.0000,     0.0000,
             0.0000,     0.0000,     0.0000,     0.0001,     0.0001,     0.0009,
             0.0022,     0.0002,     0.0000,     0.0000,     0.0002,     0.0000,
             0.0000,     0.0000,     0.0000,     0.0000,     0.0000,     0.0000,
             0.0000,     0.0000,     0.0000,     0.0000,     0.0000,     0.0000,
             0.0000,     0.0000,     0.0000,     0.0000,     0.0000,     0.0000,
             0.0000,     0.0