In [1]:
import argparse
import torch
from main import get_args_parser
from util.misc import NestedTensor
from datasets import build_dataset
import datasets.samplers as samplers
from torch.utils.data import DataLoader
import util.misc as utils
from datasets.data_prefetcher import data_prefetcher
from models.ops.functions import MSDeformAttnFunction

parser = argparse.ArgumentParser(
        'Deformable DETR training and evaluation script', parents=[get_args_parser()])
args = parser.parse_args(args=[])
args.masks = True

# data loader
device = torch.device(args.device)
print(device)
dataset_train = build_dataset(image_set='train', args=args)

sampler_train = torch.utils.data.RandomSampler(dataset_train)
batch_sampler_train = torch.utils.data.BatchSampler(
    sampler_train, args.batch_size, drop_last=True)
data_loader_train = DataLoader(dataset_train, batch_sampler=batch_sampler_train,
                               collate_fn=utils.collate_fn, num_workers=args.num_workers,
                               pin_memory=True)

prefetcher = data_prefetcher(data_loader_train, device, prefetch=True)
samples, coords, targets = prefetcher.next()

cuda


In [2]:
print("samples.tensors: {}".format(samples.tensors.shape))
print("samples.mask: {}".format(samples.mask.shape))
print("coords.tensors: {}".format(coords.tensors.shape))
print("coords.mask: {}".format(coords.mask.shape))

samples.tensors: torch.Size([1, 3, 3, 640, 480])
samples.mask: torch.Size([1, 3, 640, 480])
coords.tensors: torch.Size([1, 3, 3, 640, 480])
coords.mask: torch.Size([1, 3, 640, 480])


In [3]:
from models.backbone import build_backbone
from models.deformable_transformer import build_deforamble_transformer
from models.deformable_detr import DeformableDETR

num_classes = 20 if args.dataset_file != 'coco' else 91
backbone = build_backbone(args)
transformer = build_deforamble_transformer(args)
model = DeformableDETR(
    backbone,
    transformer,
    num_classes=num_classes,
    num_queries=args.num_queries,
    num_feature_levels=args.num_feature_levels,
    aux_loss=args.aux_loss,
    with_box_refine=args.with_box_refine,
    two_stage=args.two_stage,
    masks=args.masks
)
model.to(device)
model(samples, coords)

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)
To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at  /opt/conda/conda-bld/pytorch_1623448265233/work/aten/src/ATen/native/BinaryOps.cpp:467.)
  return torch.floor_divide(self, other)


(0, 14400)
(14400, 18000)
(18000, 18900)
(18900, 19140)
[[tensor([[[[[1.8785e-05, 4.1629e-05, 1.8648e-05,  ..., 1.9910e-05,
            2.7976e-05, 1.4093e-05],
           [2.3032e-05, 3.0777e-05, 1.9975e-05,  ..., 2.2883e-05,
            2.5782e-05, 1.0341e-05],
           [5.8244e-05, 2.0370e-05, 2.1780e-05,  ..., 1.8053e-05,
            2.3551e-05, 2.7441e-05],
           ...,
           [4.5512e-05, 1.1765e-05, 1.6985e-05,  ..., 4.5482e-05,
            4.0855e-05, 2.3190e-05],
           [8.0194e-05, 2.0145e-05, 3.5984e-05,  ..., 3.7776e-05,
            2.4949e-05, 7.6894e-05],
           [2.0736e-05, 2.6355e-05, 4.2778e-05,  ..., 2.5736e-05,
            3.1628e-05, 2.3683e-05]],

          [[1.9537e-05, 1.9623e-05, 2.8279e-05,  ..., 2.8056e-05,
            5.4784e-05, 6.6226e-06],
           [1.4790e-06, 2.1420e-05, 4.8070e-05,  ..., 2.0181e-05,
            9.7146e-05, 3.4324e-05],
           [2.0082e-05, 1.8005e-05, 2.3696e-05,  ..., 4.8665e-05,
            6.6708e-05, 5.1573e-06

SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [None]:
b, q, n, c, h, w = 32, 300, 8, 256, 64, 64
qh = torch.rand(b, q, n, c)
kh = torch.rand(b, n, c, h, w)
weights = torch.einsum("bqnc,bnchw->bqnhw", qh, kh)
print(qh.shape)
print(kh.shape)
print(weights.shape)

torch.Size([32, 300, 8, 256])
torch.Size([32, 8, 256, 64, 64])
torch.Size([32, 300, 8, 64, 64])
