In [1]:
# Darknet部分

In [29]:
# 熟悉一遍4.train.py的parse_model_config函数
def parse_model_config(path):
    """Parses the yolo-v3 layer configuration file and returns module definitions"""
    file = open(path, 'r')
    lines = file.read().split('\n')
    lines = [x for x in lines if x and not x.startswith('#')]
    lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces
    module_defs = []
    for line in lines:
#         print(line)
        if line.startswith('['): # This marks the start of a new block
            module_defs.append({})
            module_defs[-1]['type'] = line[1:-1].rstrip() #在最后一个括号中插入'type'(字典),并且去掉末尾的空格
            if module_defs[-1]['type'] == 'convolutional':
                module_defs[-1]['batch_normalize'] = 0
        else:
            key, value = line.split("=")
            value = value.strip()
            module_defs[-1][key.rstrip()] = value.strip()

    return module_defs

config_path = '/home/lsc/a409/users/lisuicheng/Machine_learning/PyTorch-YOLOv3/config/yolov3.cfg'
module_defs = parse_model_config(config_path)
module_defs

[{'type': 'net',
  'batch': '16',
  'subdivisions': '1',
  'width': '416',
  'height': '416',
  'channels': '3',
  'momentum': '0.9',
  'decay': '0.0005',
  'angle': '0',
  'saturation': '1.5',
  'exposure': '1.5',
  'hue': '.1',
  'learning_rate': '0.001',
  'burn_in': '1000',
  'max_batches': '500200',
  'policy': 'steps',
  'steps': '400000,450000',
  'scales': '.1,.1'},
 {'type': 'convolutional',
  'batch_normalize': '1',
  'filters': '32',
  'size': '3',
  'stride': '1',
  'pad': '1',
  'activation': 'leaky'},
 {'type': 'convolutional',
  'batch_normalize': '1',
  'filters': '64',
  'size': '3',
  'stride': '2',
  'pad': '1',
  'activation': 'leaky'},
 {'type': 'convolutional',
  'batch_normalize': '1',
  'filters': '32',
  'size': '1',
  'stride': '1',
  'pad': '1',
  'activation': 'leaky'},
 {'type': 'convolutional',
  'batch_normalize': '1',
  'filters': '64',
  'size': '3',
  'stride': '1',
  'pad': '1',
  'activation': 'leaky'},
 {'type': 'shortcut', 'from': '-3', 'activation

In [30]:
import numpy as np
module_defs_np = np.array(module_defs)
module_defs_np.shape, module_defs_np[0], module_defs_np[1], type(module_defs_np[1]) 

((108,),
 {'type': 'net',
  'batch': '16',
  'subdivisions': '1',
  'width': '416',
  'height': '416',
  'channels': '3',
  'momentum': '0.9',
  'decay': '0.0005',
  'angle': '0',
  'saturation': '1.5',
  'exposure': '1.5',
  'hue': '.1',
  'learning_rate': '0.001',
  'burn_in': '1000',
  'max_batches': '500200',
  'policy': 'steps',
  'steps': '400000,450000',
  'scales': '.1,.1'},
 {'type': 'convolutional',
  'batch_normalize': '1',
  'filters': '32',
  'size': '3',
  'stride': '1',
  'pad': '1',
  'activation': 'leaky'},
 dict)

In [41]:
def create_modules(module_defs):
    """
    Constructs module list of layer blocks from module configuration in module_defs
    """
    hyperparams = module_defs.pop(0)
    output_filters = [int(hyperparams["channels"])]
    module_list = nn.ModuleList()
    for i, module_def in enumerate(module_defs):
        modules = nn.Sequential()

        if module_def["type"] == "convolutional":
            bn = int(module_def["batch_normalize"])
            filters = int(module_def["filters"])

            kernel_size = int(module_def["size"])
            pad = (kernel_size - 1) // 2 if int(module_def["pad"]) else 0
            modules.add_module(
                "conv_%d" % i,
                nn.Conv2d(
                    in_channels=output_filters[-1],
                    out_channels=filters,
                    kernel_size=kernel_size,
                    stride=int(module_def["stride"]),
                    padding=pad,
                    bias=not bn, # 位移参数，可选项，一般也不用管(网上说的) 如果有bn(batch_normalize)就没有bias 不清楚为什么这样
                ),
            )
            if bn:
                modules.add_module("batch_norm_%d" % i, nn.BatchNorm2d(filters))
            if module_def["activation"] == "leaky":
                modules.add_module("leaky_%d" % i, nn.LeakyReLU(0.1))

        elif module_def["type"] == "maxpool":
            kernel_size = int(module_def["size"])
            stride = int(module_def["stride"])
            if kernel_size == 2 and stride == 1:
                padding = nn.ZeroPad2d((0, 1, 0, 1))
                modules.add_module("_debug_padding_%d" % i, padding)
            maxpool = nn.MaxPool2d(
                kernel_size=int(module_def["size"]),
                stride=int(module_def["stride"]),
                padding=int((kernel_size - 1) // 2),
            )
            modules.add_module("maxpool_%d" % i, maxpool)

        elif module_def["type"] == "upsample":
            upsample = nn.Upsample(scale_factor=int(module_def["stride"]), mode="nearest")
            modules.add_module("upsample_%d" % i, upsample)

        elif module_def["type"] == "route":
            layers = [int(x) for x in module_def["layers"].split(",")]
            filters = sum([output_filters[layer_i] for layer_i in layers])
            modules.add_module("route_%d" % i, EmptyLayer())

        elif module_def["type"] == "shortcut":
            filters = output_filters[int(module_def["from"])]
            modules.add_module("shortcut_%d" % i, EmptyLayer())

        elif module_def["type"] == "yolo":
            anchor_idxs = [int(x) for x in module_def["mask"].split(",")]
            # Extract anchors
            anchors = [int(x) for x in module_def["anchors"].split(",")]
            anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]
            anchors = [anchors[i] for i in anchor_idxs]
            num_classes = int(module_def["classes"])
            img_height = int(hyperparams["height"])
            # Define detection layer
            yolo_layer = YOLOLayer(anchors, num_classes, img_height)
            modules.add_module("yolo_%d" % i, yolo_layer)
        # Register module list and number of output filters
        module_list.append(modules)
        output_filters.append(filters)

    return hyperparams, module_list, output_filters # 这里改动了, 将output_filters也返回了

In [42]:
class EmptyLayer(nn.Module):
    """Placeholder for 'route' and 'shortcut' layers"""

    def __init__(self):
        super(EmptyLayer, self).__init__()


class YOLOLayer(nn.Module):
    """Detection layer"""

    def __init__(self, anchors, num_classes, img_dim):
        super(YOLOLayer, self).__init__()
        self.anchors = anchors
        self.num_anchors = len(anchors)
        self.num_classes = num_classes
        self.bbox_attrs = 5 + num_classes
        self.image_dim = img_dim
        self.ignore_thres = 0.5
        self.lambda_coord = 1
        # print(self.anchors, self.num_anchors, self.num_classes, self.bbox_attrs, self.image_dim) # [(30, 61), (62, 45), (59, 119)] 3 80 85 416

        self.mse_loss = nn.MSELoss(size_average=True)  # Coordinate loss
        self.bce_loss = nn.BCELoss(size_average=True)  # Confidence loss
        self.ce_loss = nn.CrossEntropyLoss()  # Class loss

    def forward(self, x, targets=None):
        # print(x.size()) # torch.Size([1, 255, 13, 13])
        nA = self.num_anchors
        nB = x.size(0)
        nG = x.size(2)
        stride = self.image_dim / nG
        # print(stride, 'stride') # 32

        # Tensors for cuda support
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor

        prediction = x.view(nB, nA, self.bbox_attrs, nG, nG).permute(0, 1, 3, 4, 2).contiguous()
        # print( nB, nA, self.bbox_attrs, nG, x.view(nB, nA, self.bbox_attrs, nG, nG).shape, x.view(nB, nA, self.bbox_attrs, nG, nG).permute(0, 1, 3, 4, 2).shape, prediction.shape) # 1 3 85 13 torch.Size([1, 3, 85, 13, 13]) torch.Size([1, 3, 13, 13, 85]) torch.Size([1, 3, 13, 13, 85])

        # Get outputs
        x = torch.sigmoid(prediction[..., 0])  # Center x
        y = torch.sigmoid(prediction[..., 1])  # Center y
        w = prediction[..., 2]  # Width
        h = prediction[..., 3]  # Height
        pred_conf = torch.sigmoid(prediction[..., 4])  # Conf
        pred_cls = torch.sigmoid(prediction[..., 5:])  # Cls pred.
        # x, y, w, h,pred_conf, pred_cls : torch.Size([1, 3, 13, 13]) torch.Size([1, 3, 13, 13]) torch.Size([1, 3, 13, 13]) torch.Size([1, 3, 13, 13]) torch.Size([1, 3, 13, 13]) torch.Size([1, 3, 13, 13, 80])

        # Calculate offsets for each grid  计算每个网格的偏移量
        grid_x = torch.arange(nG).repeat(nG, 1).view([1, 1, nG, nG]).type(FloatTensor)
        grid_y = torch.arange(nG).repeat(nG, 1).t().view([1, 1, nG, nG]).type(FloatTensor)
        scaled_anchors = FloatTensor([(a_w / stride, a_h / stride) for a_w, a_h in self.anchors])
        anchor_w = scaled_anchors[:, 0:1].view((1, nA, 1, 1))
        anchor_h = scaled_anchors[:, 1:2].view((1, nA, 1, 1))
        # (torch.Size([1, 1, 13, 13]), torch.Size([1, 1, 13, 13]), torch.Size([3, 2]), torch.Size([1, 3, 1, 1]), torch.Size([1, 3, 1, 1]))

        # Add offset and scale with anchors
        pred_boxes = FloatTensor(prediction[..., :4].shape) # torch.Size([1, 3, 13, 13, 4])
        pred_boxes[..., 0] = x.data + grid_x
        pred_boxes[..., 1] = y.data + grid_y
        pred_boxes[..., 2] = torch.exp(w.data) * anchor_w
        pred_boxes[..., 3] = torch.exp(h.data) * anchor_h #  torch.Size([1, 3, 13, 13]),

        # Training
        if targets is not None:

            if x.is_cuda:
                self.mse_loss = self.mse_loss.cuda()
                self.bce_loss = self.bce_loss.cuda()
                self.ce_loss = self.ce_loss.cuda()

            nGT, nCorrect, mask, conf_mask, tx, ty, tw, th, tconf, tcls = build_targets(
                pred_boxes=pred_boxes.cpu().data,
                pred_conf=pred_conf.cpu().data,
                pred_cls=pred_cls.cpu().data,
                target=targets.cpu().data,
                anchors=scaled_anchors.cpu().data,
                num_anchors=nA,
                num_classes=self.num_classes,
                grid_size=nG,
                ignore_thres=self.ignore_thres,
                img_dim=self.image_dim,
            )

            nProposals = int((pred_conf > 0.5).sum().item())
            recall = float(nCorrect / nGT) if nGT else 1
            precision = float(nCorrect / nProposals)

            # Handle masks
            mask = Variable(mask.type(ByteTensor))
            conf_mask = Variable(conf_mask.type(ByteTensor))

            # Handle target variables
            tx = Variable(tx.type(FloatTensor), requires_grad=False)
            ty = Variable(ty.type(FloatTensor), requires_grad=False)
            tw = Variable(tw.type(FloatTensor), requires_grad=False)
            th = Variable(th.type(FloatTensor), requires_grad=False)
            tconf = Variable(tconf.type(FloatTensor), requires_grad=False)
            tcls = Variable(tcls.type(LongTensor), requires_grad=False)

            # Get conf mask where gt and where there is no gt
            conf_mask_true = mask
            conf_mask_false = conf_mask - mask

            # Mask outputs to ignore non-existing objects
            loss_x = self.mse_loss(x[mask], tx[mask])
            loss_y = self.mse_loss(y[mask], ty[mask])
            loss_w = self.mse_loss(w[mask], tw[mask])
            loss_h = self.mse_loss(h[mask], th[mask])
            loss_conf = self.bce_loss(pred_conf[conf_mask_false], tconf[conf_mask_false]) + self.bce_loss(
                pred_conf[conf_mask_true], tconf[conf_mask_true]
            )
            loss_cls = (1 / nB) * self.ce_loss(pred_cls[mask], torch.argmax(tcls[mask], 1)) # 没改之前  if use one hot code use # y_true_cls = tf.argmax(y_true, dimension=1)
            # loss_cls = (1 / nB) * self.ce_loss(pred_cls[mask], torch.argmax(tcls[mask], -1)) # if not one hot code use y_true_cls = y_true
            loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls

            return (
                loss,
                loss_x.item(),
                loss_y.item(),
                loss_w.item(),
                loss_h.item(),
                loss_conf.item(),
                loss_cls.item(),
                recall,
                precision,
            )

        else:
            # If not in training phase return predictions  Target is None
            output = torch.cat(
                (
                    pred_boxes.view(nB, -1, 4) * stride,
                    pred_conf.view(nB, -1, 1),
                    pred_cls.view(nB, -1, self.num_classes),
                ),
                -1,
            )
            return output


In [44]:
# -*- coding: utf-8 -*-

from __future__ import division

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np

from PIL import Image

# from utils.parse_config import *
# from utils.utils import build_targets
from collections import defaultdict

import matplotlib.pyplot as plt
import matplotlib.patches as patches

config_path = '/home/lsc/a409/users/lisuicheng/Machine_learning/PyTorch-YOLOv3/config/yolov3.cfg'
module_defs = parse_model_config(config_path)

hyperparams, module_list, output_filters= create_modules(module_defs)
hyperparams, module_list, output_filters



({'type': 'net',
  'batch': '16',
  'subdivisions': '1',
  'width': '416',
  'height': '416',
  'channels': '3',
  'momentum': '0.9',
  'decay': '0.0005',
  'angle': '0',
  'saturation': '1.5',
  'exposure': '1.5',
  'hue': '.1',
  'learning_rate': '0.001',
  'burn_in': '1000',
  'max_batches': '500200',
  'policy': 'steps',
  'steps': '400000,450000',
  'scales': '.1,.1'},
 ModuleList(
   (0): Sequential(
     (conv_0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
     (batch_norm_0): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
     (leaky_0): LeakyReLU(negative_slope=0.1)
   )
   (1): Sequential(
     (conv_1): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
     (batch_norm_1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
     (leaky_1): LeakyReLU(negative_slope=0.1)
   )
   (2): Sequential(
     (conv_2): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 

In [39]:
bn = 1
bias=not bn
bias

False

# nn.Conv2d
CLASS torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True)
[1] - kernel_size
[2] - stride - 步长
[3] - padding - 每一维补零的数量
[4] - dilation - 控制 kernel 点之间的空间距离(the spacing between the kernel points). 带孔卷积(atrous conv)
[5] - groups - 控制 inputs 和 outputs 间的关联性(分组). 其中，要求 in_channels 和 out_channels 必须都可以被 groups 整除.

# nn.BatchNorm2d(out_channnels)
详细的见programming/python_modules_of_YOLOv3/Pytorch_YOLOv3_Darknet中的演示

# nn.LearyReKU(0, 1)

# nn.Sequential()
modules = nn.Sequential()

用法

modules.add_module(
                "conv_%d" % i,
                nn.Conv2d(
                    in_channels=output_filters[-1],
                    out_channels=filters,
                    kernel_size=kernel_size,
                    stride=int(module_def["stride"]),
                    padding=pad,
                    bias=not bn,
                ),
            )
            
modules.add_module("batch_norm_%d" % i, nn.BatchNorm2d(filters))

# nn.Upsample
详细的见programming/python_modules_of_YOLOv3/Pytorch_YOLOv3_Darknet中的演示


# nn.ModuleList()
普通list中的子module并不能被主module所识别，而ModuleList中的子module能够被主module所识别。这意味着如果用list保存子module，将无法调整其参数，因其未加入到主module的参数中。

除ModuleList之外还有ParameterList，其是一个可以包含多个parameter的类list对象。在实际应用中，使用方式与ModuleList类似。

ModuleList可以包含很多个nn.Sequential()

module_list = nn.ModuleList()
module_list.append(modules) # modules是nn.Sequential()

In [51]:
# shortcut 和 route 都是空层 不知道为什么这样



In [53]:
# yolo 
# 不同的yolo层对应的mask不用
module_def = {'type': 'yolo',
  'mask': '0,1,2',
  'anchors': '10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326',
  'classes': '80',
  'num': '9',
  'jitter': '.3',
  'ignore_thresh': '.7',
  'truth_thresh': '1',
  'random': '1'}

anchor_idxs = [int(x) for x in module_def["mask"].split(",")]
print(anchor_idxs)
# Extract anchors
anchors = [int(x) for x in module_def["anchors"].split(",")]
print(anchors)
anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]
print(anchors)
anchors = [anchors[i] for i in anchor_idxs]
print(anchors)
num_classes = int(module_def["classes"])
img_height = int(hyperparams["height"])
print(num_classes, img_height)
# Define detection layer
yolo_layer = YOLOLayer(anchors, num_classes, img_height)

module_def = {'type': 'yolo',
  'mask': '3,4,5',
  'anchors': '10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326',
  'classes': '80',
  'num': '9',
  'jitter': '.3',
  'ignore_thresh': '.7',
  'truth_thresh': '1',
  'random': '1'}

anchor_idxs = [int(x) for x in module_def["mask"].split(",")]
print(anchor_idxs)
# Extract anchors
anchors = [int(x) for x in module_def["anchors"].split(",")]
print(anchors)
anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]
print(anchors)
anchors = [anchors[i] for i in anchor_idxs]
print(anchors)
num_classes = int(module_def["classes"])
img_height = int(hyperparams["height"])
print(num_classes, img_height)
# Define detection layer
yolo_layer = YOLOLayer(anchors, num_classes, img_height)

[0, 1, 2]
[10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326]
[(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), (59, 119), (116, 90), (156, 198), (373, 326)]
[(10, 13), (16, 30), (33, 23)]
80 416
[3, 4, 5]
[10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326]
[(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), (59, 119), (116, 90), (156, 198), (373, 326)]
[(30, 61), (62, 45), (59, 119)]
80 416


In [None]:
# 再仔细看一下yolo层的分布
# 请看programming/python_modules_of_YOLOv3/PPytoch_YOLOv3_YOLOLayer中的演示