In [1]:
import torch, detectron2
!nvcc --version
TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)
print("detectron2:", detectron2.__version__)

  from .autonotebook import tqdm as notebook_tqdm


nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2022 NVIDIA Corporation
Built on Tue_May__3_18:49:52_PDT_2022
Cuda compilation tools, release 11.7, V11.7.64
Build cuda_11.7.r11.7/compiler.31294372_0
torch:  1.13 ; cuda:  cu117
detectron2: 0.6


In [2]:
import os
import sys
import logging
import argparse

pth = '/'.join(sys.path[0].split('/')[:-1])
sys.path.insert(0, pth)

from pprint import pprint
from PIL import Image
from copy import deepcopy
import numpy as np
np.random.seed(1)

home_dir = os.path.abspath(os.getcwd()+"/../")
sys.path.append(home_dir)
print(home_dir)

import warnings
warnings.filterwarnings(action='ignore')
logger = logging.getLogger(__name__)

import torch
from torchvision import transforms
from torch.nn import functional as F

from detectron2.data import MetadataCatalog
from detectron2.utils.colormap import random_color
from detectron2.structures import Boxes, ImageList, Instances, BitMasks, BoxMode

from hdecoder.BaseModel import BaseModel
from hdecoder import build_model

from utils.arguments import load_opt_command
from utils.misc import hook_metadata, hook_switcher, hook_opt
from utils.distributed import init_distributed
from utils.arguments import load_opt_from_config_files, load_config_dict_to_opt

/home/djjin/Mygit/X-Decoder


Invalid MIT-MAGIC-COOKIE-1 key

In [3]:
parser = argparse.ArgumentParser(description='Pretrain or fine-tune models for NLP tasks.')
parser.add_argument('--command', default="evaluate", help='Command: train/evaluate/train-and-evaluate')
parser.add_argument('--conf_files', nargs='+', help='Path(s) to the config file(s).')
parser.add_argument('--user_dir', help='Path to the user defined module for tasks (models, criteria), optimizers, and lr schedulers.')
parser.add_argument('--config_overrides', nargs='*', help='Override parameters on config with a json style string, e.g. {"<PARAM_NAME_1>": <PARAM_VALUE_1>, "<PARAM_GROUP_2>.<PARAM_SUBGROUP_2>.<PARAM_2>": <PARAM_VALUE_2>}. A key with "." updates the object in the corresponding nested dict. Remember to escape " in command line.')
parser.add_argument('--overrides', help='arguments that used to override the config file in cmdline', nargs=argparse.REMAINDER)

cmdline_args = parser.parse_args('')
# cmdline_args.conf_files = [os.path.join(home_dir, "configs/xdecoder/svlp_focalt_lang.yaml")]
cmdline_args.conf_files = [os.path.join(home_dir, "configs/xdecoder/vcoco.yaml")]
cmdline_args.overrides = ['WEIGHT', '../checkpoints/xdecoder_focalt_best_openseg.pt'] 
cmdline_args.overrides

opt = load_opt_from_config_files(cmdline_args.conf_files)

keys = [cmdline_args.overrides[idx*2] for idx in range(len(cmdline_args.overrides)//2)]
vals = [cmdline_args.overrides[idx*2+1] for idx in range(len(cmdline_args.overrides)//2)]
vals = [val.replace('false', '').replace('False','') if len(val.replace(' ', '')) == 5 else val for val in vals]
types = []
for key in keys:
    key = key.split('.')
    ele = opt.copy()
    while len(key) > 0:
        ele = ele[key.pop(0)]
    types.append(type(ele))

config_dict = {x:z(y) for x,y,z in zip(keys, vals, types)}
config_dict

load_config_dict_to_opt(opt, config_dict)
for key, val in cmdline_args.__dict__.items():
    if val is not None:
        opt[key] = val
opt = init_distributed(opt)

## Model

In [4]:
model = BaseModel(opt, build_model(opt)).train().cuda()

In [5]:
# model

In [6]:
# t = []
# t.append(transforms.Resize(800, interpolation=Image.BICUBIC))
# transform = transforms.Compose(t)
# pixel_mean = torch.Tensor([123.675, 116.280, 103.530]).view(-1, 1, 1).cuda()
# pixel_std = torch.Tensor([58.395, 57.120, 57.375]).view(-1, 1, 1).cuda()
# image_pth = '../images/animals.png'

# with torch.no_grad():
#     image_ori = Image.open(image_pth).convert('RGB')
#     width = image_ori.size[0]
#     height = image_ori.size[1]
#     image = transform(image_ori)
#     image = np.asarray(image)
#     image_ori = np.asarray(image_ori)
#     images = torch.from_numpy(image.copy()).permute(2,0,1).cuda()
#     batch_inputs = [{'image': images, 'height': height, 'width': width}]
#     # out = model.forward(batch_inputs)
#     # print(out)

## Train Dataloader

In [7]:
from datasets.registration.register_vcoco_dataset import register_all_vcoco
_root = os.getenv("DATASET", "../datasets")
register_all_vcoco(_root)



In [8]:
from hdecoder.modules.criterion import SetCriterionHOI
from hdecoder.modules.matcher import HungarianMatcherHOI

matcher = HungarianMatcherHOI(
    cost_obj_class=1, 
    cost_verb_class=1,
    cost_bbox=2.5, 
    cost_giou=1, 
    cost_matching=1).cuda()

weight_dict = {}
weight_dict['loss_obj_ce'] = 1
weight_dict['loss_verb_ce'] = 2
weight_dict['loss_sub_bbox'] = 2.5
weight_dict['loss_obj_bbox'] = 2.5
weight_dict['loss_sub_giou'] = 1
weight_dict['loss_obj_giou'] = 1
losses = ['obj_labels', 'verb_labels', 'sub_obj_boxes', 'obj_cardinality']

num_obj_classes = 80
num_queries = 100
num_verb_classes = 29
eos_coef = 0.1
criterion = SetCriterionHOI(
    num_obj_classes, num_queries, num_verb_classes, matcher=matcher,
                            weight_dict=weight_dict, eos_coef=eos_coef, losses=losses).cuda()

In [9]:
from datasets.build import build_train_dataloader
train_data_loader = [build_train_dataloader(opt)]
dataset_names = opt['DATASETS']['TRAIN']

In [10]:
for dataloader, dataset_name in zip(train_data_loader, dataset_names):
    with torch.no_grad():
        for idx, batch in enumerate(dataloader):
            if idx > 2:
                break
            for test in batch:
                samples = test["image"]
                targets = test["instances"]
                targets = [{k: v.to("cuda") for k, v in targets.items() if k != 'filename'}]
                batch_inputs = [{'image': samples}]
                outputs = model(batch_inputs)
                loss = criterion(outputs, targets)
                pprint(loss)


{'loss_obj_bbox': tensor(1.1783, device='cuda:0'),
 'loss_obj_ce': tensor(4.7438, device='cuda:0'),
 'loss_obj_giou': tensor(1.1937, device='cuda:0'),
 'loss_sub_bbox': tensor(0.8894, device='cuda:0'),
 'loss_sub_giou': tensor(1.1627, device='cuda:0'),
 'loss_verb_ce': tensor(228.8560, device='cuda:0'),
 'obj_cardinality_error': tensor(100., device='cuda:0'),
 'obj_class_error': tensor(100., device='cuda:0')}
{'loss_obj_bbox': tensor(0.9525, device='cuda:0'),
 'loss_obj_ce': tensor(4.8186, device='cuda:0'),
 'loss_obj_giou': tensor(1.1995, device='cuda:0'),
 'loss_sub_bbox': tensor(0.8073, device='cuda:0'),
 'loss_sub_giou': tensor(1.0595, device='cuda:0'),
 'loss_verb_ce': tensor(74.8375, device='cuda:0'),
 'obj_cardinality_error': tensor(99., device='cuda:0'),
 'obj_class_error': tensor(100., device='cuda:0')}
{'loss_obj_bbox': tensor(1.1688, device='cuda:0'),
 'loss_obj_ce': tensor(4.7719, device='cuda:0'),
 'loss_obj_giou': tensor(1.4906, device='cuda:0'),
 'loss_sub_bbox': tensor(