In [1]:
import torch
import torchvision
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.anchor_utils import AnchorGenerator

In [None]:
model1 = torchvision.models.inception.inception_v3(pretrained=True)
model2 = torchvision.models.mobilenet_v2(pretrained=True)

print(model1)
print(model2)

In [20]:
backbone = torchvision.models.convnext_base(pretrained=True).features

backbone.out_channels = 1024
# let's make the RPN generate 5 x 3 anchors per spatial
# location, with 5 different sizes and 3 different aspect
# ratios. We have a Tuple[Tuple[int]] because each feature
# map could potentially have different sizes and
# aspect ratios
anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
                                   aspect_ratios=((0.5, 1.0, 2.0),))
# let's define what are the feature maps that we will
# use to perform the region of interest cropping, as well as
# the size of the crop after rescaling.
# if your backbone returns a Tensor, featmap_names is expected to
# be ['0']. More generally, the backbone should return an
# OrderedDict[Tensor], and in featmap_names you can choose which
# feature maps to use.
roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],
                                                output_size=7,
                                                sampling_ratio=2)
# put the pieces together inside a FasterRCNN model
model = FasterRCNN(backbone,
                   num_classes=2,
                   rpn_anchor_generator=anchor_generator,
                   box_roi_pool=roi_pooler)
model.eval()
x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
predictions = model(x)

Downloading: "https://download.pytorch.org/models/convnext_base-6075fbad.pth" to /kuacc/users/cakarsubasi18/.cache/torch/hub/checkpoints/convnext_base-6075fbad.pth


  0%|          | 0.00/338M [00:00<?, ?B/s]

In [8]:
model2 = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

In [10]:
model2.backbone.body = model

In [12]:
model2

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): Inception3(
      (Conv2d_1a_3x3): BasicConv2d(
        (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
        (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
      (Conv2d_2a_3x3): BasicConv2d(
        (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
      (Conv2d_2b_3x3): BasicConv2d(
        (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
      (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)

In [13]:
model2.eval()
x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
predictions = model2(x) 

AttributeError: 'Tensor' object has no attribute 'keys'

In [3]:
torch.cuda.device_count()

1

In [9]:
anchor_generator = AnchorGenerator()

In [None]:
roi_pooler = torchvision.ops.MultiScaleRoIAlign()

In [14]:
import torch
import torchvision
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
# load a pre-trained model for classification and return
# only the features
backbone = torchvision.models.mobilenet_v2(pretrained=True).features
# FasterRCNN needs to know the number of
# output channels in a backbone. For mobilenet_v2, it's 1280
# so we need to add it here
backbone.out_channels = 1280
# let's make the RPN generate 5 x 3 anchors per spatial
# location, with 5 different sizes and 3 different aspect
# ratios. We have a Tuple[Tuple[int]] because each feature
# map could potentially have different sizes and
# aspect ratios
anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
                                   aspect_ratios=((0.5, 1.0, 2.0),))
# let's define what are the feature maps that we will
# use to perform the region of interest cropping, as well as
# the size of the crop after rescaling.
# if your backbone returns a Tensor, featmap_names is expected to
# be ['0']. More generally, the backbone should return an
# OrderedDict[Tensor], and in featmap_names you can choose which
# feature maps to use.
roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],
                                                output_size=7,
                                                sampling_ratio=2)
# put the pieces together inside a FasterRCNN model
model = FasterRCNN(backbone,
                   num_classes=2,
                   rpn_anchor_generator=anchor_generator,
                   box_roi_pool=roi_pooler)
model.eval()
x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
predictions = model(x)

Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /kuacc/users/cakarsubasi18/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth


  0%|          | 0.00/13.6M [00:00<?, ?B/s]

In [15]:
backbone

Sequential(
  (0): ConvNormActivation(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU6(inplace=True)
  )
  (1): InvertedResidual(
    (conv): Sequential(
      (0): ConvNormActivation(
        (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
        (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU6(inplace=True)
      )
      (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (2): InvertedResidual(
    (conv): Sequential(
      (0): ConvNormActivation(
        (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): Re

In [23]:
backbone = torchvision.models.resnet50(pretrained=True)

backbone.out_channels = 256

anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
                                   aspect_ratios=((0.5, 1.0, 2.0),))

roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],
                                                output_size=[7, 7],
                                                sampling_ratio=2)

model = FasterRCNN(backbone,
                   num_classes=4,
                   rpn_anchor_generator=anchor_generator,
                   box_roi_pool=roi_pooler)

model.eval()
x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
predictions = model(x)

RuntimeError: Expected 3D (unbatched) or 4D (batched) input to conv2d, but got input of size: [2, 1000]

In [19]:
backbone

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [7]:
import torch
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
from torchvision.models.detection.backbone_utils import resnet_fpn_backbone
from torchvision.models.detection.rpn import RPNHead
import torchvision.models.detection.faster_rcnn
import torch.nn as nn
num_classes = 4


backbone = resnet_fpn_backbone('resnet50', pretrained=True, trainable_layers=5)

anchor_sizes = ((32,), (64,), (128,), (256,), (512,))
aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)

rpn_head = RPNHead(backbone.out_channels, rpn_anchor_generator.num_anchors_per_location()[0])
#box_head = torchvision.models.detection.faster_rcnn.TwoMLPHead((backbone.out_channels, 7, 7 ), [256, 256, 256, 256])

model = FasterRCNN(
    backbone, 
    num_classes=num_classes,
    rpn_anchor_generator=rpn_anchor_generator,
    rpn_head=rpn_head)

#model.load_state_dict(state_dict=fasterrcnn_resnet50_fpn_v2_weights)

In [8]:
model.eval()
x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
predictions = model(x) 

In [None]:
def muscima_fpn_model(
    num_classes=4,
    backbone='resnet50',
    aspects=(0.5, 1.0, 2.0)):
    '''
    TODO: 
    rpn_nms_thresh
    rpn_fg_iou_thresh
    rpn_bg_iou_thresh
    rpn_batch_size_per_image
    rpn_score_thresh

    box_score_thresh
    box_nms_thresh
    box_detections_per_img
    box_fg_iou_thresh
    box_bg_iou_thresh
    '''

    model_backbone = resnet_fpn_backbone(backbone, pretrained=True, trainable_layers=5)

    anchor_sizes = ((32,), (64,), (128,), (256,), (512,))
    aspect_ratios = (aspects,) * len(anchor_sizes)
    rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)

    rpn_head = RPNHead(model_backbone.out_channels, rpn_anchor_generator.num_anchors_per_location()[0])
    #box_head = torchvision.models.detection.faster_rcnn.TwoMLPHead((backbone.out_channels, 7, 7 ), [256, 256, 256, 256])

    model = FasterRCNN(
        model_backbone, 
        num_classes=num_classes,
        rpn_anchor_generator=rpn_anchor_generator,
        rpn_head=rpn_head)

    return model

In [1]:
from vision.torchvision.models.detection.faster_rcnn import fasterrcnn_resnet50_fpn_v2

from torch.utils.model_zoo import load_url


fasterrcnn_resnet50_fpn_v2_weights = load_url("https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_v2_coco-dd69338a.pth", progress=True)

#class FasterRCNN_ResNet50_FPN_V2_Weights(WeightsEnum):
#    COCO_V1 = Weights(
#        url="https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_v2_coco-dd69338a.pth",
#        transforms=ObjectDetection,
#        meta={
#            **_COMMON_META,
#            "publication_year": 2021,
#            "num_params": 43712278,
#            "recipe": "https://github.com/pytorch/vision/pull/5763",
#            "map": 46.7,
#        },
#    )
#    DEFAULT = COCO_V1

Collecting pyyaml==5.1
  Downloading PyYAML-5.1.tar.gz (274 kB)
     -------------------------------------- 274.2/274.2 KB 1.7 MB/s eta 0:00:00
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: pyyaml
  Building wheel for pyyaml (setup.py): started
  Building wheel for pyyaml (setup.py): finished with status 'done'
  Created wheel for pyyaml: filename=PyYAML-5.1-cp39-cp39-win_amd64.whl size=44087 sha256=302ce5131045b3a323cdf2b0e81f6051677a1d936a6ea8ce32348f82b09d1ee4
  Stored in directory: c:\users\explo\appdata\local\pip\cache\wheels\68\be\8f\b6c454cd264e0b349b47f8ee00755511f277618af9e5dae20d
Successfully built pyyaml
Installing collected packages: pyyaml
  Attempting uninstall: pyyaml
    Found existing installation: PyYAML 5.4.1
Note: you may need to restart the kernel to use updated packages.
    Uninstalling PyYAML-5.4.1:
      Successfully uninstalled PyYAML-5.4.1
Successfully installed py

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tf-models-official 2.8.0 requires opencv-python-headless, which is not installed.


torch:  1.11 ; cuda:  cu113
Looking in links: https://dl.fbaipublicfiles.com/detectron2/wheels/cu113/torch1.11/index.html
Note: you may need to restart the kernel to use updated packages.


ERROR: Could not find a version that satisfies the requirement detectron2 (from versions: none)
ERROR: No matching distribution found for detectron2
