In [1]:
import numpy as np
import torch
import torchvision

## implementation of trident block

In [2]:
class trident_block(torch.nn.Module):
    expansion = 4
    def __init__(self, input_channels, output_channels, 
                 stride = 1, padding = [1,2,3], dilation = [1,2,3], downsample = None):
        super(trident_block, self).__init__()
        self.stride = stride
        self.padding = padding
        self.dilation = dilation
        self.downsample = downsample
        
        self.shared_weights4convolution1 = torch.nn.Parameter(torch.randn(input_channels,output_channels,1,1))
        self.shared_weights4convolution2 = torch.nn.Parameter(torch.randn(output_channels,output_channels,3,3))
        self.shared_weights4convolution3 = torch.nn.Parameter(torch.randn(output_channels,
                                                                          output_channels*self.expansion,1,1))
        self.bn11 = torch.nn.BatchNorm2d(output_channels)
        self.bn12 = torch.nn.BatchNorm2d(output_channels)
        self.bn13 = torch.nn.BatchNorm2d(output_channels*self.expansion)
        
        self.bn21 = torch.nn.BatchNorm2d(output_channels)
        self.bn22 = torch.nn.BatchNorm2d(output_channels)
        self.bn23 = torch.nn.BatchNorm2d(output_channels*self.expansion)
        
        self.bn31 = torch.nn.BatchNorm2d(output_channels)
        self.bn32 = torch.nn.BatchNorm2d(output_channels)
        self.bn33 = torch.nn.BatchNorm2d(output_channels*self.expansion)
        
        self.relu1 = torch.nn.ReLU(inplace = True)
        self.relu2 = torch.nn.ReLU(inplace = True)
        self.relu3 = torch.nn.ReLU(inplace = True)
#----====----====----====----====----====----====----====----====----====----====----====----====----====----====----====
    def forward_branch_1(self,x): # bran
        residual = x
        # conv 1x1
        output = torch.nn.functional.conv2d(x, shared_weights4convolution1, bias = None)
        output = self.bn11(output)
        output = self.relu1(output)
        # conv 3x3
        output = torch.nn.functional(output,self=shared_weights4convolution2, bias = None,
                                    stride = self.stride, padding = self.padding[0], 
                                    dilation = self.dilation[0])
        output = self.bn12(output)
        output = self.relu1(output)
        # conv 1x1
        output = torch.nn.functional(output,shared_weights4convolution3,bias = None)
        output = self.bn13(output)
        
        if self.downsample is not None:
            residual = downsample(x)
        
        output += residual
        output = self.relu1(output)
        return output
#----====----====----====----====----====----====----====----====----====----====----====----====----====----====----====
    def forward_branch_2(self,x): # bran
        residual = x
        # conv 1x1
        output = torch.nn.functional.conv2d(x, shared_weights4convolution1, bias = None)
        output = self.bn21(output)
        output = self.relu2(output)
        # conv 3x3
        output = torch.nn.functional(output,self=shared_weights4convolution2, bias = None,
                                    stride = self.stride, padding = self.padding[1], 
                                    dilation = self.dilation[1])
        output = self.bn22(output)
        output = self.relu2(output)
        # conv 1x1
        output = torch.nn.functional(output,shared_weights4convolution3,bias = None)
        output = self.bn23(output)
        
        if self.downsample is not None:
            residual = downsample(x)
        
        output += residual
        output = self.relu2(output)
        return output
#----====----====----====----====----====----====----====----====----====----====----====----====----====----====----====
    def forward_branch_3(self,x): # bran
        residual = x
        # conv 1x1
        output = torch.nn.functional.conv2d(x, shared_weights4convolution1, bias = None)
        output = self.bn31(output)
        output = self.relu3(output)
        # conv 3x3
        output = torch.nn.functional(output, self=shared_weights4convolution2, bias = None,
                                    stride = self.stride, padding = self.padding[2], 
                                    dilation = self.dilation[2])
        output = self.bn32(output)
        output = self.relu3(output)
        # conv 1x1
        output = torch.nn.functional(output, shared_weights4convolution3, bias = None)
        output = self.bn33(output)
        
        if self.downsample is not None:
            residual = downsample(x)
        
        output += residual
        output = self.relu3(output)
        return output
#----====----====----====----====----====----====----====----====----====----====----====----====----====----====----====
    def total_forward(self,x):
        feature_list = list()
        if self.downsample is not None:
            feature_list.append(self.forward_branch_1(x))
            feature_list.append(self.forward_branch_2(x))
            feature_list.append(self.forward_branch_3(x))
        else:
            feature_list.append(self.forward_branch_1(x[0]))
            feature_list.append(self.forward_branch_2(x[1]))
            feature_list.append(self.forward_branch_3(x[2])) 
        return feature_list

## implementation of BottleNeck

In [3]:
class BottleNeck(torch.nn.Module):
    expansion = 4
    def __init__(self, input_channels, output_channels, downsample = None):
        super(BottleNeck, self).__init__()
        # conv's structures
        self.conv_1 = torch.nn.Conv2d(input_channels, output_channels, kernel_size=1, bias=False)
        self.bn1 = torch.nn.BatchNorm2d(output_channels)
        
        self.conv_2 = torch.nn.Conv2d(input_channels, output_channels, kernel_size=3, bias=False, padding = 1)
        self.bn2 = torch.nn.BatchNorm2d(output_channels)
        
        self.conv_3 = torch.nn.Conv2d(input_channels, output_channels * self.expansion, kernel_size=1, bias=False)
        self.bn3 = torch.nn.BatchNorm2d(output_channels * self.expansion)
        
        # acticvation and downsample
        self.relu = torch.nn.ReLU(inplace=True)
        self.downsample = downsample
        
#----====----====----====----====----====----====----====----====----====----====----====----====----====----====----====
    def forward(self, x):
        residual = x
        # conv 1x1
        output = self.conv_1(x)
        output = self.bn1(output)
        output = self.relu(output)
        # conv 3x3
        output = self.conv_2(output)
        output = self.bn2(output)
        output = self.relu(output)
        # conv 1x1
        output = self.conv_3(output)
        output = self.bn3(output)
        
        if self.downsample is not None:
            residual = downsample(x)
        
        output +=residual
        output = self.relu(output)
        
        return output

## implementation of BasicBlock

In [4]:
class Basic_Block(torch.nn.Module):
    def __init__(self, input_channels, output_channels, downsample = None):
        super(Basic_Block, self).__init__()
        # conv's structures
        self.conv1 = torch.nn.Conv2d(input_channels, output_channels, kernel_size=3, padding=1, bias=False)
        self.conv2 = torch.nn.Conv2d(input_channels, output_channels, kernel_size=3, padding=1, bias=False)
        # normalizations
        self.bn1 = torch.nn.BatchNorm2d(output_channels)
        self.bn2 = torch.nn.BatchNorm2d(output_channels)
        # acticvation and downsample
        self.relu = torch.nn.ReLU(inplace=True)
        self.downsample = downsample
        
#----====----====----====----====----====----====----====----====----====----====----====----====----====----====----====
    def forward(self, x):
        residual = x
        # first 3x3 convolution
        output = self.conv1(x)
        output = self.bn1(output)
        output = self.relu(output)
        # second 3x3 convolution
        output = self.conv2(output)
        output = self.bn2(output)
        
        if self.downsample is not None:
            residual = downsample(x)
        output += residual
        output = self.relu(output)
        
        return output

In [5]:
class ResNet(torch.nn.Module):
    def __init__(self, num_classes, net_type, BOT_block = BottleNeck, TRI_block = trident_block): 
        # ,
        """
        params: avalible net_types - 'ResNet-50', 
                                     'ResNet-101', 
                                     'ResNet-152'
        """
        self.input_channels = 64
        super(ResNet, self).__init__()
        
        self.net_type = net_type
        if self.net_type == 'ResNet-50':
            print("Net_type is ResNet-50")
            layers = [3, 4, 6, 3]
        if self.net_type == 'ResNet-101':
            print("Net_type is ResNet-101")
            layers = [3, 4, 23, 3]
        if self.net_type == 'ResNet-152':
            print("Net_type is ResNet-152")
            layers = [3, 8, 36, 3]
            
        self.conv1 = torch.nn.Conv2d(3,64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1   = torch.nn.BatchNorm2d(64)
        self.relu  = torch.nn.ReLU(inplace=True)
        
        self.max_pooling = torch.nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True) 
        
        self.layer1 = self._make_layer(BOT_block, 64, layers[0])
        self.layer2 = self._make_layer(BOT_block, 128, layers[1])    
        self.layer3 = self._make_layer(BOT_block, 256, layers[2])
        self.layer4 = self._make_layer(TRI_block, 512, layers[3])
        
        
        self.avg_pool = torch.nn.AvgPool2d(kernel_size=7) # размерность та же 
        self.FC = torch.nn.Linear(512 * BOT_block.expansion, num_classes)
        
#----====----====----====----====----====----====----====----====----====----====----====----====----====----====----====
    def res_forward(self, x):
        output = self.conv1(x)
        output = self.bn1(output)
        output = self.relu(output)
        output = self.max_pooling(output)
        
        output = self.layer1(output)
        output = self.layer2(output)
        output = self.layer3(output)
        output = self.layer4(output)

        # Вопрос - что  елать с выхоом после Tridnt bлока
        output = torch.cat([output[0],output[1],output[2]], dim = 0)
        return output
    
#----====----====----====----====----====----====----====----====----====----====----====----====----====----====----====
    def _make_layer(self, block, output_channels, num_of_bottle_neck_blocks, stride = 1):
        downsample = None
        if stride != 1 or self.input_channels != output_channels*block.expansion:
            downsample = torch.nn.Sequential(
                                torch.nn.Conv2d(self.input_channels, output_channels * block.expansion,
                                                kernel_size = 1, stride = stride, bias=False),
                                torch.nn.BatchNorm2d(output_channels * block.expansion)
            )
        layers = list()
        layers.append(block(self.input_channels, output_channels, downsample))
        self.input_channels = output_channels * block.expansion
        
        for i in range(1,num_of_bottle_neck_blocks):
            layers.append(block(self.input_channels,output_channels))
            
        return torch.nn.Sequential(*layers)

### https://github.com/wllvcxz/faster-rcnn-pytorch/tree/master/model
### https://github.com/jwyang/faster-rcnn.pytorch/blob/master/lib/model/rpn/rpn.py

In [6]:
images = torch.randn(2,3,500,500)
bbx = torch.randn(2,4)
lbs = torch.tensor([[1.],
                    [2.]])

In [18]:
model1 = torchvision.models.detection.fasterrcnn_resnet50_fpn(num_classes=21)
model1.train()

FasterRCNN(
  (transform): GeneralizedRCNNTransform()
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d()
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d()
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d()
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d()
          (relu): ReLU(inplace=True)
          (downsample): Sequential(
            (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (1): FrozenBatchNorm2d()
          )
  

In [19]:
model2 = torchvision.models.detection.fasterrcnn_resnet50_fpn(num_classes=21)
model2.eval()

FasterRCNN(
  (transform): GeneralizedRCNNTransform()
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d()
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d()
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d()
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d()
          (relu): ReLU(inplace=True)
          (downsample): Sequential(
            (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (1): FrozenBatchNorm2d()
          )
  

In [9]:
def MultiBoxLoss(images, model):
    # model = torchvision.models.detection.fasterrcnn_resnet50_fpn(num_classes=21)
    transform1 = model.transform(images)[0]
    transform2 = transform1.tensors
    img_sizes = transform1.image_sizes
    features1 = model.backbone(transform2)
    # losses for RPN
    """
    Arguments:
        images (ImageList): images for which we want to compute the predictions
        features (List[Tensor]): features computed from the images that are
            used for computing the predictions. Each tensor in the list
            correspond to different feature levels
        targets (List[Dict[Tensor]]): ground-truth boxes present in the image (optional).
            If provided, each element in the dict should contain a field `boxes`,
            with the locations of the ground-truth boxes.
    Returns:
        boxes (List[Tensor]): the predicted boxes from the RPN, one Tensor per
            image.
        losses (Dict[Tensor]): the losses for the model during training. During
            testing, it is an empty dict.
    """

    bboxes, rpn_losses = model.rpn.forward(transform1,features1)
    try:
        rpn_loss = rpn_losses['loss_objectness']+rpn_losses['loss_rpn_box_reg']
    except:
        rpn_loss = 'Not'
        pass

    # losses for RoI
    """
    Arguments:
        features (List[Tensor])
        proposals (List[Tensor[N, 4]])
        image_shapes (List[Tuple[H, W]])
        targets (List[Dict])
    Returns:
        classification_loss (Tensor)
        box_loss (Tensor)
    """
    result, roi_losses = model.roi_heads(features1, bboxes, img_sizes)
    try:
        roi_loss = roi_losses['loss_classifier']+roi_losses['loss_box_reg']
    except:
        roi_loss = ' ready'
        pass

    return rpn_loss+roi_loss

In [10]:
MultiBoxLoss(images, model2)

'Not ready'

In [11]:
import cv2
from matplotlib import pyplot as plt
import random

In [12]:
!wget https://images.unsplash.com/photo-1458169495136-854e4c39548a -O girl_cars.jpg
#object_detection_api('./girl_cars.jpg', rect_th=15, text_th=7, text_size=5, threshold=0.8)  
img = cv2.imread('./girl_cars.jpg')

--2019-09-11 13:20:55--  https://images.unsplash.com/photo-1458169495136-854e4c39548a
Resolving localhost (localhost)... ::1, 127.0.0.1
Connecting to localhost (localhost)|::1|:3128... connected.
Proxy request sent, awaiting response... 200 OK
Length: 1168686 (1.1M) [image/jpeg]
Saving to: ‘girl_cars.jpg’


2019-09-11 13:20:56 (3.44 MB/s) - ‘girl_cars.jpg’ saved [1168686/1168686]



In [13]:
tensor_img = torch.tensor(img).permute(2,0,1).unsqueeze(0)
tensor_bbox = torch.tensor([2300,600, 3500, 3300], dtype = torch.float64)
tensor_label = torch.tensor([1.])

In [14]:
pt1 = (2300,600) 
pt2 = (3500,3300)
img2 = img.copy()
for i in np.array(rez[0][:10]):
    cv2.rectangle(img2, pt1,pt2, (255,0,0), thickness = 16)
    cv2.rectangle(img2, (boxes[i][0],boxes[i][1]), (boxes[i][2],boxes[i][3]), color = (0,255,0), thickness = 16)
plt.imshow(img2)

NameError: name 'rez' is not defined

In [None]:
plt.figure(figsize=(10,15))
pt1 = (2300,600) 
pt2 = (3500,3300)
pt = (pt1,pt2)
bboxes = []
img1 = img.copy()
img1 = cv2.rectangle(img1, pt1,pt2, color = (255,0,0), thickness = 16)
for proposals in range(100):
    flag = random.choice([0,1])
    if flag == 1:
        x1 = pt[0][0]+random.randint(50, 500) 
        y1 = pt[0][1]+random.randint(50, 500)
    else:
        x1 = pt[0][0]-random.randint(50, 500) 
        y1 = pt[0][1]-random.randint(50, 500)

    flag = random.choice([0,1])
    if flag == 1:
        x2 = pt[1][0]+random.randint(50, 500) 
        y2 = pt[1][1]+random.randint(50, 500)
    else:    
        x2 = pt[1][0]-random.randint(50, 500) 
        y2 = pt[1][1]-random.randint(50, 500)
    
    p11 = pt1
    p12 = pt2
    p21 = (x1,y1)
    p22 = (x2,y2)
    
    xx1 = max(p11[0],p21[0])
    yy1 = max(p11[1],p21[1])
    xx2 = min(p12[0],p22[0])
    yy2 = min(p12[1],p22[1])
    w = xx2 - xx1
    h = yy2 - yy1
    square = w*h
    union = (p12[0]-p11[0])*(p12[1]-p11[1]) + (p22[0]-p21[0])*(p22[1]-p21[1]) - square
    iou = square/union    
    
    bboxes.append([[x1,y1, x2,y2], iou])
    
    cv2.rectangle(img1, (x1,y1), (x2,y2), color = (0,255,0), thickness = 16)
plt.imshow(img1)

In [None]:
boxes = []
scores = []
for el in bboxes:
    boxes.append(el[0])
    scores.append(el[1])

In [None]:
box = torch.tensor(np.array(boxes), dtype = torch.float64)
score = torch.tensor(np.array(scores), dtype = torch.float64)

In [None]:
rez1 = torchvision.ops.nms(box, score, iou_threshold=0.5)
rez1