In [2]:
import argparse
import datetime
import random
import time
from pathlib import Path

import torch
import torchvision.transforms as standard_transforms
import numpy as np

from PIL import Image
import cv2
from engine import *
from models import build_model
import os
import warnings

import matplotlib.pyplot as plt
import time
from scipy.io import loadmat
import pandas as pd

from imgaug import augmenters as iaa
import imgaug as ia
from imgaug.augmentables.bbs import BoundingBox, BoundingBoxesOnImage

warnings.filterwarnings('ignore')

In [3]:
def get_args_parser():
    parser = argparse.ArgumentParser('Set parameters for P2PNet evaluation', add_help=False)
    
    # * Backbone
    parser.add_argument('--backbone', default='vgg16_bn', type=str,
                        help="name of the convolutional backbone to use")

    parser.add_argument('--row', default=2, type=int,
                        help="row number of anchor points")
    parser.add_argument('--line', default=2, type=int,
                        help="line number of anchor points")

    parser.add_argument('--output_dir', default='./logs/',
                        help='path where to save')
    parser.add_argument('--weight_path', default='./weights/SHTechA.pth',
                        help='path where the trained weights saved')

    parser.add_argument('--gpu_id', default=0, type=int, help='the gpu used for evaluation')

    return parser

In [4]:
parser = argparse.ArgumentParser('P2PNet evaluation script', parents=[get_args_parser()])
args = parser.parse_known_args()[0]

os.environ["CUDA_VISIBLE_DEVICES"] = '{}'.format(args.gpu_id)

print(args)
device = torch.device('cuda')

model = build(args, False)

args.resume = '/home/ding/P2PNet/ckpt/best_mae.pth'
checkpoint = torch.load(args.resume, map_location='cpu')

model.load_state_dict(checkpoint['model'])

model.to(device)
model.eval()

# create the pre-processing transform
transform = standard_transforms.Compose([
    standard_transforms.ToTensor(), 
    standard_transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

Namespace(backbone='vgg16_bn', row=2, line=2, output_dir='./logs/', weight_path='./weights/SHTechA.pth', gpu_id=0)


NameError: name 'build' is not defined

In [17]:
parser = argparse.ArgumentParser('P2PNet evaluation script', parents=[get_args_parser()])
args = parser.parse_known_args()[0]

from models import build_model
os.environ["CUDA_VISIBLE_DEVICES"] = '{}'.format(args.gpu_id)

print(args)
device = torch.device('cuda')

# get the P2PNet
model = build_model(args)
# move to GPU
model.to(device)
# load trained model
if args.weight_path is not None:
    checkpoint = torch.load(args.weight_path, map_location='cpu')
    model.load_state_dict(checkpoint['model'])
# convert to eval mode
model.eval()



Namespace(backbone='vgg16_bn', row=2, line=2, output_dir='./logs/', weight_path='./weights/SHTechA.pth', gpu_id=0)


P2PNet(
  (backbone): Backbone_VGG(
    (body1): Sequential(
      (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): ReLU(inplace=True)
      (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (9): ReLU(inplace=True)
      (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (12): ReLU(inplace=True)
    )
    (body2): Sequential(
      (0): MaxPool2d(ke

In [10]:
img_file = '/home/ding/Datasets/ShanghaiTech_Crowd_Counting_Dataset/part_A_final/train_data/images'

# create the pre-processing transform
transform = standard_transforms.Compose([
    standard_transforms.ToTensor(), 
    standard_transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# read image file
img_names = sorted(os.listdir(img_file))

img_raw = Image.open(os.path.join(img_file, img_names[0])).convert('RGB')
# round the size
width, height = img_raw.size
new_width = width // 128 * 128
new_height = height // 128 * 128
img_raw = img_raw.resize((new_width, new_height), Image.ANTIALIAS)
# pre-proccessing
img = transform(img_raw)

samples = torch.Tensor(img).unsqueeze(0)
samples = samples.to(device)

#features = model.backbone(samples)


In [19]:
for feature in features:
    print(feature.shape)

torch.Size([1, 128, 384, 512])
torch.Size([1, 256, 192, 256])
torch.Size([1, 512, 96, 128])
torch.Size([1, 512, 48, 64])


In [None]:
import torch
from torch import nn
from yolov6.layers.common import RepBlock, SimConv, Transpose


class RepPANNeck(nn.Module):
    """RepPANNeck Module
    EfficientRep is the default backbone of this model.
    RepPANNeck has the balance of feature fusion ability and hardware efficiency.
    """

    def __init__(
        self,
        channels_list=None,
        num_repeats=None
    ):
        super().__init__()

        assert channels_list is not None
        assert num_repeats is not None

        self.Rep_p4 = RepBlock(
            in_channels=channels_list[3] + channels_list[5],
            out_channels=channels_list[5],
            n=num_repeats[5],
        )

        self.Rep_p3 = RepBlock(
            in_channels=channels_list[2] + channels_list[6],
            out_channels=channels_list[6],
            n=num_repeats[6]
        )

        self.Rep_n3 = RepBlock(
            in_channels=channels_list[6] + channels_list[7],
            out_channels=channels_list[8],
            n=num_repeats[7],
        )

        self.Rep_n4 = RepBlock(
            in_channels=channels_list[5] + channels_list[9],
            out_channels=channels_list[10],
            n=num_repeats[8]
        )

        self.reduce_layer0 = SimConv(
            in_channels=channels_list[4],
            out_channels=channels_list[5],
            kernel_size=1,
            stride=1
        )

        self.upsample0 = Transpose(
            in_channels=channels_list[5],
            out_channels=channels_list[5],
        )

        self.reduce_layer1 = SimConv(
            in_channels=channels_list[5],
            out_channels=channels_list[6],
            kernel_size=1,
            stride=1
        )

        self.upsample1 = Transpose(
            in_channels=channels_list[6],
            out_channels=channels_list[6]
        )

        self.downsample2 = SimConv(
            in_channels=channels_list[6],
            out_channels=channels_list[7],
            kernel_size=3,
            stride=2
        )

        self.downsample1 = SimConv(
            in_channels=channels_list[8],
            out_channels=channels_list[9],
            kernel_size=3,
            stride=2
        )

    def forward(self, input):

        (C3, C4, C5) = input

        fpn_out0 = self.reduce_layer0(C5)
        upsample_feat0 = self.upsample0(fpn_out0)
        f_concat_layer0 = torch.cat([upsample_feat0, C4], 1)
        f_out0 = self.Rep_p4(f_concat_layer0)

        fpn_out1 = self.reduce_layer1(f_out0)
        upsample_feat1 = self.upsample1(fpn_out1)
        f_concat_layer1 = torch.cat([upsample_feat1, C3], 1)
        P3 = self.Rep_p3(f_concat_layer1)

        down_feat1 = self.downsample2(pan_out2)
        p_concat_layer1 = torch.cat([down_feat1, fpn_out1], 1)
        P4 = self.Rep_n3(p_concat_layer1)

        down_feat0 = self.downsample1(pan_out1)
        p_concat_layer2 = torch.cat([down_feat0, fpn_out0], 1)
        P5 = self.Rep_n4(p_concat_layer2)

        outputs = [P3, P4, P5]

        return outputs

In [7]:
# run inference
outputs = model(samples)
outputs_scores = torch.nn.functional.softmax(outputs['pred_logits'], -1)[:, :, 1][0]
outputs_points = outputs['pred_points'][0]

# filter the predictions
points = outputs_points[outputs_scores > threshold].detach().cpu().numpy().tolist()
predict_cnt = int((outputs_scores > threshold).sum())

"\n# run inference\noutputs = model(samples)\noutputs_scores = torch.nn.functional.softmax(outputs['pred_logits'], -1)[:, :, 1][0]\noutputs_points = outputs['pred_points'][0]\n\n# filter the predictions\npoints = outputs_points[outputs_scores > threshold].detach().cpu().numpy().tolist()\npredict_cnt = int((outputs_scores > threshold).sum())\n"

In [1]:
# 展示ShanghaiTech数据集中的预测情况

img_file = '/home/ding/Datasets/test_video/2_img'
threshold = 0.5

img_names = sorted(os.listdir(img_file))

# load the images
img_raw = Image.open(os.path.join(img_file, img_names[0])).convert('RGB')
# round the size
width, height = img_raw.size
new_width = width // 128 * 128
new_height = height // 128 * 128
img_raw = img_raw.resize((new_width, new_height), Image.ANTIALIAS)
# pre-proccessing
img = transform(img_raw)

samples = torch.Tensor(img).unsqueeze(0)
samples = samples.to(device)
# run inference
outputs = model(samples)
outputs_scores = torch.nn.functional.softmax(outputs['pred_logits'], -1)[:, :, 1][0]
outputs_points = outputs['pred_points'][0]

# filter the predictions
points = outputs_points[outputs_scores > threshold].detach().cpu().numpy().tolist()
predict_cnt = int((outputs_scores > threshold).sum())

# draw the predictions
img_to_draw = cv2.cvtColor(np.array(img_raw), cv2.COLOR_RGB2BGR)
for p in points:
    img_to_draw = cv2.circle(img_to_draw, (int(p[0]), int(p[1])), 5, (0, 255, 0), -1)
img_to_draw = cv2.resize(img_to_draw, (width, height))
# draw the predictions on the image
plt.figure(figsize=(16,9))
ax1 = plt.subplot(1,2,1)
plt.imshow(img_to_draw)
plt.title("prediction: "+str(predict_cnt))

'''
# draw the ground truths on the image
gt_path = os.path.join(gt_file, 'GT_'+image_name.split('.')[0]+'.mat')
gt_m= loadmat(gt_path)
gt_point = gt_m["image_info"][0,0][0,0][0]
gt_cnt = gt_m["image_info"][0,0][0,0][1][0][0]
img_raw = Image.open(img_path).convert('RGB')
gt_img_to_draw = cv2.cvtColor(np.array(img_raw), cv2.COLOR_RGB2BGR)
for p in gt_point:
    gt_img_to_draw = cv2.circle(gt_img_to_draw, (int(p[0]), int(p[1])), 2, (255, 0, 0), -1)
ax2 = plt.subplot(1,2,2)
plt.imshow(gt_img_to_draw)
plt.title("ground truth: "+str(gt_cnt))
'''
plt.show()

NameError: name 'os' is not defined

In [2]:
from torch import nn
class Decoder(nn.Module):
    def __init__(self, C2_size, C3_size, C4_size, C5_size, feature_size=256):
        super(Decoder, self).__init__()

        # upsample C5 to get P5 from the FPN paper
        self.P5_1 = nn.Conv2d(C5_size, feature_size, kernel_size=1, stride=1, padding=0)
        self.P5_upsampled = nn.Upsample(scale_factor=2, mode='nearest')
        self.P5_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1)

        # add P5 elementwise to C4
        self.P4_1 = nn.Conv2d(C4_size, feature_size, kernel_size=1, stride=1, padding=0)
        self.P4_upsampled = nn.Upsample(scale_factor=2, mode='nearest')
        self.P4_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1)

        # add P4 elementwise to C3
        self.P3_1 = nn.Conv2d(C3_size, feature_size, kernel_size=1, stride=1, padding=0)
        self.P3_upsampled = nn.Upsample(scale_factor=2, mode='nearest')
        self.P3_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1)
        
        # add P3 elementwise to C2
        self.P2_1 = nn.Conv2d(C2_size, feature_size, kernel_size=1, stride=1, padding=0)
        self.P2_upsampled = nn.Upsample(scale_factor=2, mode='nearest')
        self.P2_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1)


    def forward(self, inputs):
        C2, C3, C4, C5 = inputs

        P5_x = self.P5_1(C5)
        P5_upsampled_x = self.P5_upsampled(P5_x)
        P5_x = self.P5_2(P5_x)

        P4_x = self.P4_1(C4)
        P4_x = P5_upsampled_x + P4_x
        P4_upsampled_x = self.P4_upsampled(P4_x)
        P4_x = self.P4_2(P4_x)

        P3_x = self.P3_1(C3)
        P3_x = P4_upsampled_x + P3_x
        P3_upsampled_x = self.P3_upsampled(P3_x)
        P3_x = self.P3_2(P3_x)
        
        P2_x = self.P2_1(C2)
        P2_x = P2_x + P3_upsampled_x
        P2_x = self.P2_2(P2_x)

        return [P2_x, P3_x, P4_x, P5_x]

In [2]:
import torch
import torch.nn.functional as F
from torch import nn

from util.misc import (NestedTensor, nested_tensor_from_tensor_list,
                       accuracy, get_world_size, interpolate,
                       is_dist_avail_and_initialized)


import numpy as np
import time

class RegressionModel(nn.Module):
    def __init__(self, num_features_in, num_anchor_points=4, feature_size=128):
        super(RegressionModel, self).__init__()

        self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1)
        self.act1 = nn.ReLU()

        self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
        self.act2 = nn.ReLU()
        
        #self.conv3 = nn.Conv2d(feature_size, out_channels=96, kernel_size=1)

        self.output = nn.Conv2d(feature_size, num_anchor_points * 2, kernel_size=3, padding=1)
    # sub-branch forward
    def forward(self, x):
        out = self.conv1(x)
        out = self.act1(out)

        out = self.conv2(out)
        out = self.act2(out)
        
        #out = self.conv3(out)

        out = self.output(out)

        out = out.permute(0, 2, 3, 1)

        return out.contiguous().view(out.shape[0], -1, 2)
    

# the network frmawork of the classification branch
class ClassificationModel(nn.Module):
    def __init__(self, num_features_in, num_anchor_points=4, num_classes=80, prior=0.01, feature_size=128):
        super(ClassificationModel, self).__init__()

        self.num_classes = num_classes
        self.num_anchor_points = num_anchor_points

        self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1)
        self.act1 = nn.ReLU()

        self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
        self.act2 = nn.ReLU()

        self.output = nn.Conv2d(feature_size, num_anchor_points * num_classes, kernel_size=3, padding=1)
        self.output_act = nn.Sigmoid()
    # sub-branch forward
    def forward(self, x):
        out = self.conv1(x)
        out = self.act1(out)

        out = self.conv2(out)
        out = self.act2(out)

        out = self.output(out)

        out1 = out.permute(0, 2, 3, 1)

        batch_size, width, height, _ = out1.shape

        out2 = out1.view(batch_size, width, height, self.num_anchor_points, self.num_classes)

        return out2.contiguous().view(x.shape[0], -1, self.num_classes)
    
def generate_anchor_points(stride=16, row=3, line=3):
    row_step = stride / row
    line_step = stride / line

    shift_x = (np.arange(1, line + 1) - 0.5) * line_step - stride / 2
    shift_y = (np.arange(1, row + 1) - 0.5) * row_step - stride / 2

    shift_x, shift_y = np.meshgrid(shift_x, shift_y)

    anchor_points = np.vstack((
        shift_x.ravel(), shift_y.ravel()
    )).transpose()

    return anchor_points
# shift the meta-anchor to get an acnhor points
def shift(shape, stride, anchor_points):
    shift_x = (np.arange(0, shape[1]) + 0.5) * stride
    shift_y = (np.arange(0, shape[0]) + 0.5) * stride

    shift_x, shift_y = np.meshgrid(shift_x, shift_y)

    shifts = np.vstack((
        shift_x.ravel(), shift_y.ravel()
    )).transpose()

    A = anchor_points.shape[0]
    K = shifts.shape[0]
    all_anchor_points = (anchor_points.reshape((1, A, 2)) + shifts.reshape((1, K, 2)).transpose((1, 0, 2)))
    all_anchor_points = all_anchor_points.reshape((K * A, 2))

    return all_anchor_points

# this class generate all reference points on all pyramid levels
class AnchorPoints(nn.Module):
    def __init__(self, pyramid_levels=None, strides=None, row=3, line=3):
        super(AnchorPoints, self).__init__()

        if pyramid_levels is None:
            self.pyramid_levels = [3, 4, 5, 6, 7]
        else:
            self.pyramid_levels = pyramid_levels

        if strides is None:
            self.strides = [2 ** x for x in self.pyramid_levels]

        self.row = row
        self.line = line

    def forward(self, image):
        image_shape = image.shape[2:]
        image_shape = np.array(image_shape)
        image_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) for x in self.pyramid_levels]

        all_anchor_points = np.zeros((0, 2)).astype(np.float32)
        # get reference points for each level
        for idx, p in enumerate(self.pyramid_levels):
            anchor_points = generate_anchor_points(2**p, row=self.row, line=self.line)
            shifted_anchor_points = shift(image_shapes[idx], self.strides[idx], anchor_points)
            all_anchor_points = np.append(all_anchor_points, shifted_anchor_points, axis=0)

        all_anchor_points = np.expand_dims(all_anchor_points, axis=0)
        # send reference points to device
        if torch.cuda.is_available():
        #if False:
            return torch.from_numpy(all_anchor_points.astype(np.float32)).cuda()
        else:
            return torch.from_numpy(all_anchor_points.astype(np.float32))
        
class SetCriterion_Crowd(nn.Module):

    def __init__(self, num_classes, matcher, weight_dict, eos_coef, losses):
        """ Create the criterion.
        Parameters:
            num_classes: number of object categories, omitting the special no-object category
            matcher: module able to compute a matching between targets and proposals
            weight_dict: dict containing as key the names of the losses and as values their relative weight.
            eos_coef: relative classification weight applied to the no-object category
            losses: list of all the losses to be applied. See get_loss for list of available losses.
        """
        super().__init__()
        self.num_classes = num_classes
        self.matcher = matcher
        self.weight_dict = weight_dict
        self.eos_coef = eos_coef
        self.losses = losses
        empty_weight = torch.ones(self.num_classes + 1)
        empty_weight[0] = self.eos_coef
        self.register_buffer('empty_weight', empty_weight)

    def loss_labels(self, outputs, targets, indices, num_points):
        """Classification loss (NLL)
        targets dicts must contain the key "labels" containing a tensor of dim [nb_target_boxes]
        """
        assert 'pred_logits' in outputs
        src_logits = outputs['pred_logits']

        idx = self._get_src_permutation_idx(indices)
        target_classes_o = torch.cat([t["labels"][J] for t, (_, J) in zip(targets, indices)])
        target_classes = torch.full(src_logits.shape[:2], 0,
                                    dtype=torch.int64, device=src_logits.device)
        target_classes[idx] = target_classes_o

        loss_ce = F.cross_entropy(src_logits.transpose(1, 2), target_classes, self.empty_weight)
        losses = {'loss_ce': loss_ce}

        return losses

    def loss_points(self, outputs, targets, indices, num_points):

        assert 'pred_points' in outputs
        idx = self._get_src_permutation_idx(indices)
        src_points = outputs['pred_points'][idx]
        target_points = torch.cat([t['point'][i] for t, (_, i) in zip(targets, indices)], dim=0)

        loss_bbox = F.mse_loss(src_points, target_points, reduction='none')

        losses = {}
        losses['loss_point'] = loss_bbox.sum() / num_points

        return losses

    def _get_src_permutation_idx(self, indices):
        # permute predictions following indices
        batch_idx = torch.cat([torch.full_like(src, i) for i, (src, _) in enumerate(indices)])
        src_idx = torch.cat([src for (src, _) in indices])
        return batch_idx, src_idx

    def _get_tgt_permutation_idx(self, indices):
        # permute targets following indices
        batch_idx = torch.cat([torch.full_like(tgt, i) for i, (_, tgt) in enumerate(indices)])
        tgt_idx = torch.cat([tgt for (_, tgt) in indices])
        return batch_idx, tgt_idx

    def get_loss(self, loss, outputs, targets, indices, num_points, **kwargs):
        loss_map = {
            'labels': self.loss_labels,
            'points': self.loss_points,
        }
        assert loss in loss_map, f'do you really want to compute {loss} loss?'
        return loss_map[loss](outputs, targets, indices, num_points, **kwargs)

    def forward(self, outputs, targets):
        """ This performs the loss computation.
        Parameters:
             outputs: dict of tensors, see the output specification of the model for the format
             targets: list of dicts, such that len(targets) == batch_size.
                      The expected keys in each dict depends on the losses applied, see each loss' doc
        """
        output1 = {'pred_logits': outputs['pred_logits'], 'pred_points': outputs['pred_points']}

        indices1 = self.matcher(output1, targets)

        num_points = sum(len(t["labels"]) for t in targets)
        num_points = torch.as_tensor([num_points], dtype=torch.float, device=next(iter(output1.values())).device)
        if is_dist_avail_and_initialized():
            torch.distributed.all_reduce(num_points)
        num_boxes = torch.clamp(num_points / get_world_size(), min=1).item()

        losses = {}
        for loss in self.losses:
            losses.update(self.get_loss(loss, output1, targets, indices1, num_boxes))

        return losses

In [78]:
'''
features = backbone(samples)
fpn = Decoder(96,192,384,1408).to(device)
features_fpn = fpn(features)
batch_size = features[0].shape[0]
# run the regression and classification branch
regression_module = RegressionModel(num_features_in=256, num_anchor_points=4).to(device)
regression = regression_module(features_fpn[1]) * 64 # 8x
classification_module = ClassificationModel(num_features_in=256, num_classes=2, num_anchor_points=4).to(device)
classification = classification_module(features_fpn[1])
anchor_points = model.anchor_points(samples).repeat(batch_size, 1, 1)
# decode the points as prediction
output_coord = regression + anchor_points
output_class = classification
outputs = {'pred_logits': output_class, 'pred_points': output_coord}
'''

"\nfeatures = backbone(samples)\nfpn = Decoder(96,192,384,1408).to(device)\nfeatures_fpn = fpn(features)\nbatch_size = features[0].shape[0]\n# run the regression and classification branch\nregression_module = RegressionModel(num_features_in=256, num_anchor_points=4).to(device)\nregression = regression_module(features_fpn[1]) * 64 # 8x\nclassification_module = ClassificationModel(num_features_in=256, num_classes=2, num_anchor_points=4).to(device)\nclassification = classification_module(features_fpn[1])\nanchor_points = model.anchor_points(samples).repeat(batch_size, 1, 1)\n# decode the points as prediction\noutput_coord = regression + anchor_points\noutput_class = classification\noutputs = {'pred_logits': output_class, 'pred_points': output_coord}\n"

In [3]:
# the defenition of the P2PNet model
class New_P2PNet(nn.Module):
    def __init__(self, backbone, neck, row=2, line=2):
        super().__init__()
        self.backbone = backbone
        #backbone.load_state_dict(torch.load('/home/ding/P2PNet/RepVGG-A2-train.pth'))
        self.num_classes = 2
        # the number of all anchor points
        num_anchor_points = row * line

        self.regression = RegressionModel(num_features_in=128, num_anchor_points=num_anchor_points)
        self.classification = ClassificationModel(num_features_in=128, \
                                            num_classes=self.num_classes, \
                                            num_anchor_points=num_anchor_points)

        self.anchor_points = AnchorPoints(pyramid_levels=[4,], row=row, line=line)

        #self.fpn = Decoder(256, 512, 512)
        self.neck = neck

    def forward(self, samples: NestedTensor):
        # get the backbone features
        # 冻结RepVGG的参数，不更新权重
        with torch.no_grad():
            features = self.backbone(samples)
        features_fpn = self.neck(features)

        batch_size = features[1].shape[0]
        # run the regression and classification branch
        regression = self.regression(features_fpn[1]) * 64 # 8x
        classification = self.classification(features_fpn[1])
        anchor_points = self.anchor_points(samples).repeat(batch_size, 1, 1)
        # decode the points as prediction
        output_coord = regression + anchor_points
        output_class = classification
        out = {'pred_logits': output_class, 'pred_points': output_coord}
       
        return out

In [4]:
def get_yolo_args_parser(add_help=True):
    parser = argparse.ArgumentParser(description='YOLOv6 PyTorch Training', add_help=add_help)
    parser.add_argument('--data-path', default='./data/coco.yaml', type=str, help='dataset path')
    parser.add_argument('--conf-file', default='./configs/yolov6s.py', type=str, help='experiment description file')
    parser.add_argument('--img-size', type=int, default=640, help='train, val image size (pixels)')
    parser.add_argument('--batch-size', default=32, type=int, help='total batch size for all GPUs')
    parser.add_argument('--epochs', default=400, type=int, help='number of total epochs to run')
    parser.add_argument('--workers', default=8, type=int, help='number of data loading workers (default: 8)')
    parser.add_argument('--device', default='0', type=str, help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
    parser.add_argument('--noval', action='store_true', help='only evaluate in final epoch')
    parser.add_argument('--check-images', action='store_true', help='check images when initializing datasets')
    parser.add_argument('--check-labels', action='store_true', help='check label files when initializing datasets')
    parser.add_argument('--output-dir', default='./runs/train', type=str, help='path to save outputs')
    parser.add_argument('--name', default='exp', type=str, help='experiment name, save to output_dir/name')
    parser.add_argument('--dist_url', type=str, default="tcp://127.0.0.1:8888")
    parser.add_argument('--gpu_count', type=int, default=0)
    parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify')

    return parser

def check_and_init(args):
    '''check config files and device, and initialize '''

    # check files
    args.save_dir = osp.join(args.output_dir, args.name)
    os.makedirs(args.save_dir, exist_ok=True)
    cfg = Config.fromfile(args.conf_file)

    # check device
    device = select_device(args.device)

    # set random seed
    set_random_seed(1+args.rank, deterministic=(args.rank == -1))

    # save args
    save_yaml(vars(args), osp.join(args.save_dir, 'args.yaml'))

    return cfg, device

from yolov6.utils.envs import get_envs, select_device, set_random_seed
from yolov6.utils.config import Config
from yolov6.utils.events import LOGGER, save_yaml
import os.path as osp

yolo_parser = get_yolo_args_parser()
yolo_args = yolo_parser.parse_known_args()[0]
yolo_args.rank, yolo_args.local_rank, yolo_args.world_size = get_envs()
yolo_cfg, device = check_and_init(yolo_args)

from yolov6.models.yolo import build_model
def get_model(args, cfg, nc, device):
    model = build_model(cfg, nc, device)
    weights = cfg.model.pretrained
    if weights:  # finetune if pretrained model is set
        LOGGER.info(f'Loading state_dict from {weights} for fine-tuning...')
        model = load_state_dict(weights, model, map_location=device)
    LOGGER.info('Model: {}'.format(model))
    return model

yolo_model = get_model(yolo_args, yolo_cfg, 2, device)

from models.matcher import build_matcher_crowd

def build(args, training):
    # treats persons as a single class
    num_classes = 1

    backbone = yolo_model.backbone
    neck = yolo_model.neck
    model = New_P2PNet(backbone, neck, args.row, args.line)
    if not training: 
        return model

    weight_dict = {'loss_ce': 1, 'loss_points': args.point_loss_coef}
    losses = ['labels', 'points']
    matcher = build_matcher_crowd(args)
    criterion = SetCriterion_Crowd(num_classes, \
                                matcher=matcher, weight_dict=weight_dict, \
                                eos_coef=args.eos_coef, losses=losses)

    return model, criterion

Using 1 GPU for training... 
Model: Model(
  (backbone): EfficientRep(
    (stem): RepVGGBlock(
      (nonlinearity): ReLU(inplace=True)
      (se): Identity()
      (rbr_dense): Sequential(
        (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
      )
      (rbr_1x1): Sequential(
        (conv): Conv2d(3, 32, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
      )
    )
    (ERBlock_2): Sequential(
      (0): RepVGGBlock(
        (nonlinearity): ReLU(inplace=True)
        (se): Identity()
        (rbr_dense): Sequential(
          (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
          (bn): BatchNorm2d(64, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        )
        (rbr_1x1): Sequential(
          

In [5]:
def get_args_parser():
    parser = argparse.ArgumentParser('Set parameters for training P2PNet', add_help=False)
    parser.add_argument('--lr', default=1e-4, type=float)
    parser.add_argument('--lr_backbone', default=1e-5, type=float)
    parser.add_argument('--batch_size', default=16, type=int)
    parser.add_argument('--weight_decay', default=1e-4, type=float)
    parser.add_argument('--epochs', default=4500, type=int)
    parser.add_argument('--lr_drop', default=1500, type=int)
    parser.add_argument('--clip_max_norm', default=0.1, type=float,
                        help='gradient clipping max norm')

    # Model parameters
    parser.add_argument('--frozen_weights', type=str, default=None,
                        help="Path to the pretrained model. If set, only the mask head will be trained")

    # * Backbone
    parser.add_argument('--backbone', default='vgg16_bn', type=str,
                        help="Name of the convolutional backbone to use")

    # * Matcher
    parser.add_argument('--set_cost_class', default=1, type=float,
                        help="Class coefficient in the matching cost")

    parser.add_argument('--set_cost_point', default=0.05, type=float,
                        help="L1 point coefficient in the matching cost")

    # * Loss coefficients
    parser.add_argument('--point_loss_coef', default=0.0002, type=float)

    parser.add_argument('--eos_coef', default=0.5, type=float,
                        help="Relative classification weight of the no-object class")
    parser.add_argument('--row', default=2, type=int,
                        help="row number of anchor points")
    parser.add_argument('--line', default=2, type=int,
                        help="line number of anchor points")

    # dataset parameters
    parser.add_argument('--dataset_file', default='SHHA')
    parser.add_argument('--data_root', default='/home/ding/Datasets/ShanghaiTech_Crowd_Counting_Dataset/part_A_final',
                        help='path where the dataset is')
    
    parser.add_argument('--output_dir', default='./logs',
                        help='path where to save, empty for no saving')
    parser.add_argument('--checkpoints_dir', default='./ckpt',
                        help='path where to save checkpoints, empty for no saving')
    parser.add_argument('--tensorboard_dir', default='./runs',
                        help='path where to save, empty for no saving')

    parser.add_argument('--seed', default=42, type=int)
    parser.add_argument('--resume', default='', help='resume from checkpoint')
    parser.add_argument('--start_epoch', default=0, type=int, metavar='N',
                        help='start epoch')
    parser.add_argument('--eval', action='store_true')
    parser.add_argument('--num_workers', default=8, type=int)
    parser.add_argument('--eval_freq', default=10, type=int,
                        help='frequency of evaluation, default setting is evaluating in every 5 epoch')
    parser.add_argument('--gpu_id', default=0, type=int, help='the gpu used for training')

    return parser

parser = argparse.ArgumentParser('P2PNet training and evaluation script', parents=[get_args_parser()])
args = parser.parse_known_args()[0]

In [6]:
import scipy

# load image and gt pairs
def load_data(img_gt_path, train):
    img_path, gt_path = img_gt_path
    # load the images
    img = cv2.imread(img_path)
    img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    # load ground truth points
    data = scipy.io.loadmat(gt_path)
    points = data['image_info'][0][0][0][0][0]

    return img, points

# random crop augumentation
def random_crop(img, den, num_patch=4):
    half_h = 128
    half_w = 128
    result_img = np.zeros([num_patch, img.shape[0], half_h, half_w])
    result_den = []
    # crop num_patch for each image
    for i in range(num_patch):
        start_h = random.randint(0, img.size(1) - half_h)
        start_w = random.randint(0, img.size(2) - half_w)
        end_h = start_h + half_h
        end_w = start_w + half_w
        # copy the cropped rect
        result_img[i] = img[:, start_h:end_h, start_w:end_w]
        # copy the cropped points
        idx = (den[:, 0] >= start_w) & (den[:, 0] <= end_w) & (den[:, 1] >= start_h) & (den[:, 1] <= end_h)
        # shift the corrdinates
        record_den = den[idx]
        record_den[:, 0] -= start_w
        record_den[:, 1] -= start_h

        result_den.append(record_den)

    return result_img, result_den

In [7]:
from torch.utils.data import Dataset

# dataset load
class SHHA(Dataset):
    def __init__(self, data_root, transform=None, train=False, patch=False, flip=False):
        self.root_path = data_root #/home/ding/Datasets/ShanghaiTech_Crowd_Counting_Dataset/part_A_final
        img_dir = os.path.join(self.root_path, 'train_data' if train else 'test_data')
        image_names = sorted(os.listdir(os.path.join(img_dir, 'images')))
            
        self.img_list = [os.path.join(img_dir, 'images', i) for i in image_names]
        self.img_map = {}
        for _, image_name in enumerate(image_names):
            gt_name = 'GT_' + image_name.split('.')[0] + '.mat'
            self.img_map[os.path.join(img_dir, 'images', image_name)] = os.path.join(img_dir, 'ground_truth', gt_name)
        
        '''
        self.train_lists = "shanghai_tech_part_a_train.list"
        self.eval_list = "shanghai_tech_part_a_test.list"
        # there may exist multiple list files
        self.img_list_file = self.train_lists.split(',')
        if train:
            self.img_list_file = self.train_lists.split(',')
        else:
            self.img_list_file = self.eval_list.split(',')

        self.img_map = {}
        self.img_list = []
        # loads the image/gt pairs
        for _, train_list in enumerate(self.img_list_file):
            train_list = train_list.strip()
            with open(os.path.join(self.root_path, train_list)) as fin:
                for line in fin:
                    if len(line) < 2: 
                        continue
                    line = line.strip().split()
                    self.img_map[os.path.join(self.root_path, line[0].strip())] = \
                                    os.path.join(self.root_path, line[1].strip())
        self.img_list = sorted(list(self.img_map.keys()))
        '''
        # number of samples
        self.nSamples = len(self.img_list)
        
        self.transform = transform
        self.train = train
        self.patch = patch
        self.flip = flip

    def __len__(self):
        return self.nSamples

    def __getitem__(self, index):
        assert index <= len(self), 'index range error'

        img_path = self.img_list[index]
        gt_path = self.img_map[img_path]
        # load image and ground truth
        img, point = load_data((img_path, gt_path), self.train)
        # applu augumentation
        if self.transform is not None:
            img = self.transform(img)

        if self.train:
            # data augmentation -> random scale
            scale_range = [0.7, 1.3]
            min_size = min(img.shape[1:])
            scale = random.uniform(*scale_range)
            # scale the image and points
            if scale * min_size > 128:
                img = torch.nn.functional.upsample_bilinear(img.unsqueeze(0), scale_factor=scale).squeeze(0)
                point *= scale
        # random crop augumentaiton
        if self.train and self.patch:
            img, point = random_crop(img, point)
            for i, _ in enumerate(point):
                point[i] = torch.Tensor(point[i])
        # random flipping
        if random.random() > 0.5 and self.train and self.flip:
            # random flip
            img = torch.Tensor(img[:, :, :, ::-1].copy())
            for i, _ in enumerate(point):
                point[i][:, 0] = 128 - point[i][:, 0]

        if not self.train:
            point = [point]

        img = torch.Tensor(img)
        # pack up related infos
        target = [{} for i in range(len(point))]
        for i, _ in enumerate(point):
            target[i]['point'] = torch.Tensor(point[i])
            image_id = int(img_path.split('/')[-1].split('.')[0].split('_')[-1])
            image_id = torch.Tensor([image_id]).long()
            target[i]['image_id'] = image_id
            target[i]['labels'] = torch.ones([point[i].shape[0]]).long()

        return img, target
    
def loading_data(data_root):
    # the pre-proccssing transform
    transform = standard_transforms.Compose([
        standard_transforms.ToTensor(), 
        standard_transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                    std=[0.229, 0.224, 0.225]),
    ])
    # create the training dataset
    train_set = SHHA(data_root, train=True, transform=transform, patch=True, flip=True)
    # create the validation dataset
    val_set = SHHA(data_root, train=False, transform=transform)

    return train_set, val_set

In [8]:
import argparse
import datetime
import random
import time
from pathlib import Path

import torch
from torch.utils.data import DataLoader, DistributedSampler

from engine import *
import os
from tensorboardX import SummaryWriter
import warnings
warnings.filterwarnings('ignore')

os.environ["CUDA_VISIBLE_DEVICES"] = '{}'.format(args.gpu_id)
# create the logging file
run_log_name = os.path.join(args.output_dir, 'run_log.txt')


with open(run_log_name, "w+") as log_file:
    log_file.write('Eval Log %s\n' % time.strftime("%c"))

#if args.frozen_weights is not None:
#    assert args.masks, "Frozen training is meant for segmentation only"
# backup the arguments
print(args)
with open(run_log_name, "a") as log_file:
    log_file.write("{}".format(args))
device = torch.device('cuda')
'''
# fix the seed for reproducibility
seed = args.seed + utils.get_rank()
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)
'''
# get the P2PNet model
model, criterion = build(args, training=True)
# move to GPU
model.to(device)
criterion.to(device)

model_without_ddp = model

n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
print('number of params:', n_parameters)
# use different optimation params for different parts of the model
param_dicts = [
    {"params": [p for n, p in model_without_ddp.named_parameters() if "backbone" not in n and p.requires_grad]},
    {
        "params": [p for n, p in model_without_ddp.named_parameters() if "backbone" in n and p.requires_grad],
        "lr": args.lr_backbone,
    },
]
# Adam is used by default
optimizer = torch.optim.Adam(param_dicts, lr=args.lr)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, args.lr_drop)
# create the training and valiation set
train_set, val_set = loading_data(args.data_root)
# create the sampler used during training
sampler_train = torch.utils.data.RandomSampler(train_set)
sampler_val = torch.utils.data.SequentialSampler(val_set)

batch_sampler_train = torch.utils.data.BatchSampler(
    sampler_train, args.batch_size, drop_last=True)
# the dataloader for training
data_loader_train = DataLoader(train_set, batch_sampler=batch_sampler_train,
                               collate_fn=utils.collate_fn_crowd, num_workers=args.num_workers)

data_loader_val = DataLoader(val_set, 1, sampler=sampler_val,
                                drop_last=False, collate_fn=utils.collate_fn_crowd, num_workers=args.num_workers)

# continue to train previous model
args.resume = '/home/ding/P2PNet/ckpt/best_mae.pth'
if args.frozen_weights is not None:
    checkpoint = torch.load(args.frozen_weights, map_location='cpu')
    model_without_ddp.detr.load_state_dict(checkpoint['model'])
# resume the weights and training state if exists
if args.resume:
    checkpoint = torch.load(args.resume, map_location='cpu')
    model_without_ddp.load_state_dict(checkpoint['model'])
    if not args.eval and 'optimizer' in checkpoint and 'lr_scheduler' in checkpoint and 'epoch' in checkpoint:
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
        args.start_epoch = checkpoint['epoch'] + 1

print("Start training")
start_time = time.time()
# save the performance during the training
mae = []
mse = []
# the logger writer
writer = SummaryWriter(args.tensorboard_dir)

step = 0
# training starts here
for epoch in range(args.start_epoch, args.epochs):
    t1 = time.time()
    stat = train_one_epoch(
        model, criterion, data_loader_train, optimizer, device, epoch,
        args.clip_max_norm)

    # record the training states after every epoch
    if writer is not None:
        with open(run_log_name, "a") as log_file:
            log_file.write("loss/loss@{}: {}".format(epoch, stat['loss']))
            log_file.write("loss/loss_ce@{}: {}".format(epoch, stat['loss_ce']))

        writer.add_scalar('loss/loss', stat['loss'], epoch)
        writer.add_scalar('loss/loss_ce', stat['loss_ce'], epoch)

    t2 = time.time()
    print('[ep %d][lr %.7f][%.2fs]' % \
          (epoch, optimizer.param_groups[0]['lr'], t2 - t1))
    with open(run_log_name, "a") as log_file:
        log_file.write('[ep %d][lr %.7f][%.2fs]' % (epoch, optimizer.param_groups[0]['lr'], t2 - t1))
    # change lr according to the scheduler
    lr_scheduler.step()
    # save latest weights every epoch
    checkpoint_latest_path = os.path.join(args.checkpoints_dir, 'latest.pth')
    torch.save({
        'model': model_without_ddp.state_dict(),
    }, checkpoint_latest_path)
    # run evaluation
    if epoch % args.eval_freq == 0 and epoch != 0:
        t1 = time.time()
        result = evaluate_crowd_no_overlap(model, data_loader_val, device)
        t2 = time.time()

        mae.append(result[0])
        mse.append(result[1])
        # print the evaluation results
        print('=======================================test=======================================')
        print("mae:", result[0], "mse:", result[1], "time:", t2 - t1, "best mae:", np.min(mae), )
        with open(run_log_name, "a") as log_file:
            log_file.write("mae:{}, mse:{}, time:{}, best mae:{}".format(result[0], 
                            result[1], t2 - t1, np.min(mae)))
        print('=======================================test=======================================')
        # recored the evaluation results
        if writer is not None:
            with open(run_log_name, "a") as log_file:
                log_file.write("metric/mae@{}: {}".format(step, result[0]))
                log_file.write("metric/mse@{}: {}".format(step, result[1]))
            writer.add_scalar('metric/mae', result[0], step)
            writer.add_scalar('metric/mse', result[1], step)
            step += 1

        # save the best model since begining
        if abs(np.min(mae) - result[0]) < 0.01:
            checkpoint_best_path = os.path.join(args.checkpoints_dir, 'best_mae.pth')
            torch.save({
                'model': model_without_ddp.state_dict(),
            }, checkpoint_best_path)
# total time for training
total_time = time.time() - start_time
total_time_str = str(datetime.timedelta(seconds=int(total_time)))
print('Training time {}'.format(total_time_str))

Namespace(lr=0.0001, lr_backbone=1e-05, batch_size=16, weight_decay=0.0001, epochs=4500, lr_drop=1500, clip_max_norm=0.1, frozen_weights=None, backbone='vgg16_bn', set_cost_class=1, set_cost_point=0.05, point_loss_coef=0.0002, eos_coef=0.5, row=2, line=2, dataset_file='SHHA', data_root='/home/ding/Datasets/ShanghaiTech_Crowd_Counting_Dataset/part_A_final', output_dir='./logs', checkpoints_dir='./ckpt', tensorboard_dir='./runs', seed=42, resume='', start_epoch=0, eval=False, num_workers=8, eval_freq=10, gpu_id=0)
number of params: 17802896
Start training
Averaged stats: lr: 0.000100  loss: 0.1777 (0.1916)  loss_ce: 0.1777 (0.1916)  loss_ce_unscaled: 0.1777 (0.1916)  loss_point_unscaled: 50.6025 (72.0744)
[ep 0][lr 0.0001000][3.56s]
Averaged stats: lr: 0.000100  loss: 0.1907 (0.1901)  loss_ce: 0.1907 (0.1901)  loss_ce_unscaled: 0.1907 (0.1901)  loss_point_unscaled: 51.5557 (70.4763)
[ep 1][lr 0.0001000][3.21s]
Averaged stats: lr: 0.000100  loss: 0.1849 (0.1845)  loss_ce: 0.1849 (0.1845) 

KeyboardInterrupt: 

In [101]:
model, criterion = build(args, training=True)
model.to(device)
features = model.backbone(samples)
features_fpn = model.neck(features)

batch_size = features[0].shape[0]
# run the regression and classification branch
regression = model.regression(features_fpn[1]) * 64 # 8x
classification = model.classification(features_fpn[1])
anchor_points = model.anchor_points(samples).repeat(batch_size, 1, 1)

In [102]:
for i in features_fpn:
    print(i.shape)

torch.Size([1, 64, 96, 128])
torch.Size([1, 128, 48, 64])
torch.Size([1, 256, 24, 32])


In [103]:
regression.shape

torch.Size([1, 12288, 2])

In [104]:
classification.shape

torch.Size([1, 12288, 2])

In [105]:
anchor_points.shape

torch.Size([1, 12288, 2])

In [93]:
for i in features_fpn:
    print(i.shape)

torch.Size([1, 64, 96, 128])
torch.Size([1, 128, 48, 64])
torch.Size([1, 256, 24, 32])


In [55]:
output_coord = regression + anchor_points

RuntimeError: The size of tensor a (3072) must match the size of tensor b (49152) at non-singleton dimension 1

In [30]:
import warnings
from pathlib import Path

import numpy as np
import torch
import torch.nn as nn

from yolov6.layers.common import DetectBackend

cuda = device != 'cpu' and torch.cuda.is_available()
device = torch.device('cuda:0' if cuda else 'cpu')

weights = '/home/ding/P2PNet/yolov6/yolov6t.pt'
model = DetectBackend(weights, device='cpu')

Loading checkpoint from /home/ding/P2PNet/yolov6/yolov6t.pt

Fusing model...


In [34]:
os.path.join(img_file, img_names[0])

'/home/ding/Datasets/test_video/2_img/img_1.jpg'

In [50]:
os.getcwd()

'/home/ding/P2PNet'

In [49]:
os.chdir('../')

In [42]:
model

DetectBackend()

In [76]:
cfg

Config (path: ./configs/yolov6s.py): {'model': {'type': 'YOLOv6s', 'pretrained': None, 'depth_multiple': 0.33, 'width_multiple': 0.5, 'backbone': {'type': 'EfficientRep', 'num_repeats': [1, 6, 12, 18, 6], 'out_channels': [64, 128, 256, 512, 1024]}, 'neck': {'type': 'RepPAN', 'num_repeats': [12, 12, 12, 12], 'out_channels': [256, 128, 128, 256, 256, 512]}, 'head': {'type': 'EffiDeHead', 'in_channels': [128, 256, 512], 'num_layers': 3, 'begin_indices': 24, 'anchors': 1, 'out_indices': [17, 20, 23], 'strides': [8, 16, 32], 'iou_type': 'siou'}}, 'solver': {'optim': 'SGD', 'lr_scheduler': 'Cosine', 'lr0': 0.01, 'lrf': 0.01, 'momentum': 0.937, 'weight_decay': 0.0005, 'warmup_epochs': 3.0, 'warmup_momentum': 0.8, 'warmup_bias_lr': 0.1}, 'data_aug': {'hsv_h': 0.015, 'hsv_s': 0.7, 'hsv_v': 0.4, 'degrees': 0.0, 'translate': 0.1, 'scale': 0.5, 'shear': 0.0, 'flipud': 0.0, 'fliplr': 0.5, 'mosaic': 1.0, 'mixup': 0.0}}

In [85]:
from yolov6.models.efficientrep import EfficientRep
from yolov6.models.reppan import RepPANNeck
def build_network(config, channels, num_classes):
    depth_mul = config.model.depth_multiple
    width_mul = config.model.width_multiple
    num_repeat_backbone = config.model.backbone.num_repeats
    channels_list_backbone = config.model.backbone.out_channels
    num_repeat_neck = config.model.neck.num_repeats
    channels_list_neck = config.model.neck.out_channels
    num_anchors = config.model.head.anchors
    num_repeat = [(max(round(i * depth_mul), 1) if i > 1 else i) for i in (num_repeat_backbone + num_repeat_neck)]
    channels_list = [make_divisible(i * width_mul, 8) for i in (channels_list_backbone + channels_list_neck)]

    backbone = EfficientRep(
        in_channels=channels,
        channels_list=channels_list,
        num_repeats=num_repeat
    )

    neck = RepPANNeck(
        channels_list=channels_list,
        num_repeats=num_repeat
    )

    return backbone, neck

def make_divisible(x, divisor):
    # Upward revision the value x to make it evenly divisible by the divisor.
    return math.ceil(x / divisor) * divisor

In [86]:
backbone, neck = build_network(cfg, channels=3, num_classes=2)

In [92]:
features =  model.backbone(samples)
fpn = model.neck(features)

In [94]:
for i in fpn:
    print(i.shape)

torch.Size([1, 64, 128, 240])
torch.Size([1, 128, 64, 120])
torch.Size([1, 256, 32, 60])
