In [1]:
!pip3 install http://download.pytorch.org/whl/cu80/torch-0.3.0.post4-cp36-cp36m-linux_x86_64.whl
!pip3 install torchvision

Collecting torch==0.3.0.post4 from http://download.pytorch.org/whl/cu80/torch-0.3.0.post4-cp36-cp36m-linux_x86_64.whl
[?25l  Downloading http://download.pytorch.org/whl/cu80/torch-0.3.0.post4-cp36-cp36m-linux_x86_64.whl (592.3MB)
[K     |████████████████████████████████| 592.3MB 1.1MB/s 
[31mERROR: torchvision 0.3.0 has requirement torch>=1.1.0, but you'll have torch 0.3.0.post4 which is incompatible.[0m
[31mERROR: fastai 1.0.55 has requirement torch>=1.0.0, but you'll have torch 0.3.0.post4 which is incompatible.[0m
Installing collected packages: torch
  Found existing installation: torch 1.1.0
    Uninstalling torch-1.1.0:
      Successfully uninstalled torch-1.1.0
Successfully installed torch-0.3.0.post4
Collecting torch>=1.1.0 (from torchvision)
[?25l  Downloading https://files.pythonhosted.org/packages/69/60/f685fb2cfb3088736bafbc9bdbb455327bdc8906b606da9c9a81bae1c81e/torch-1.1.0-cp36-cp36m-manylinux1_x86_64.whl (676.9MB)
[K     |████████████████████████████████| 676.9MB 1

In [58]:
import torch
print(torch.__version__)

1.1.0


In [0]:
from __future__ import division
import numpy as np
import cv2

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

from util import *
from Blocks import EmptyLayer, DetectionLayer

In [0]:
def parse_cfg(filepath):
    """
    Creating blocks from a cfg file
    input: the file path of .cfg
    output: a list of blocks of the model
    """
    cfg_file = open(filepath, 'r')
    lines = cfg_file.read().split('\n')
    lines = [l for l in lines if len(l) > 0]
    lines = [l for l in lines if l[0] != '#']
    lines = [l.rstrip().lstrip() for l in lines]

    block = {}
    blocks = []

    for line in lines:
        if line[0] == '[':
            if len(block) != 0:
                blocks.append(block)
                block = {}
            block['type'] = line[1:-1].rstrip()
        else:
            key, val = line.split('=')
            block[key.rstrip()] = val.lstrip()
    blocks.append(block)                            # for the last one

    return blocks

In [0]:
def create_modules(blocks):
    """
    By taking out the blocks information one by one, create modules accordingly.
    Input: a list of dictionaries of the block attributes
    Output: a module list populated by the modules for convolutional, upsampling, route and yolo layers
    """
    net_setting = blocks[0]
    module_list = nn.ModuleList()
    input_filter = 3
    output_filters = []

    for idx, block in enumerate(blocks[1:]):
        module = nn.Sequential()

        # ConvBlock
        if block['type'] == 'convolutional':
            activation = block['activation']

            try:
                batch_normalize = int(block['batch_normalize'])
                bias = False
            except:
                batch_normalize = 0
                bias = True

            filters = int(block['filters'])
            kernel_size = int(block['size'])
            stride = int(block['stride'])
            padding = int(block['pad'])

            if padding:
                pad = (kernel_size - 1) // 2  # SAME
            else:
                pad = 0                       # VALID

            # ConvLayer
            conv_layer = nn.Conv2d(in_channels = input_filter, out_channels = filters,
                                    kernel_size = kernel_size, stride = stride, padding = pad, bias = bias)
            module.add_module('conv_{}'.format(idx), conv_layer)

            # BNLayer
            if batch_normalize:
                bn_layer = nn.BatchNorm2d(filters)
                module.add_module('batch_norm_{}'.format(idx), bn_layer)

            # Activation layer
            if activation:
                act_layer = nn.LeakyReLU(.1, inplace = True)
                module.add_module('leaky_{}'.format(idx), act_layer)

        # Upsampling
        elif block['type'] == 'upsample':
            stride = int(block['stride'])
            upsample = nn.Upsample(scale_factor = 2, mode = 'bilinear')
            module.add_module('upsample_{}'.format(idx), upsample)

        # Skip connection
        elif block['type'] == 'shortcut':
            from_layer = int(block['from'])
            skip_layer = EmptyLayer()
            module.add_module('shortcut_{}'.format(idx), skip_layer)

        # Route Layer
        elif block['type'] == 'route':
            layer_nums = block['layers'].split(',')
            start = int(layer_nums[0])

            try:
                end = int(layer_nums[1])
            except:
                end = 0

            route_layer = EmptyLayer()
            module.add_module("route_{}".format(idx), route_layer)

            # Copying the filters from the its previous layer
            if end == 0:
                filters = output_filters[start]
            else:
                filters = output_filters[start] + output_filters[end]

        # Detection Layer
        elif block['type'] == 'yolo':
            mask = [int(m) for m in block['mask'].split(',')]

            anchors = [int(a) for a in block['anchors'].split(',')]
            anchors = [(anchors[2*i], anchors[2*i+1]) for i in range(9)]
            anchors = [anchors[i] for i in mask]

            detect_layer = DetectionLayer(anchors)
            module.add_module('detection_{}'.format(idx), detect_layer)

        module_list.append(module)
        input_filter = filters
        output_filters.append(filters)

    return (net_setting, module_list)

In [0]:
class Darknet(nn.Module):

    def __init__(self, blocks, net_setting, module_list):
        super(Darknet, self).__init__()
        self.blocks = blocks
        self.net_setting = net_setting
        self.module_list = module_list

    def forward(self, x, CUDA):
        blocks = self.blocks[1:]
        outputs = {}                # for caching the layers for EmptyLayers
        temp = 0

        for idx, block in enumerate(blocks):

            type = block['type']
            if (type == 'convolutional') or (type == 'upsample'):
                x = self.module_list[idx](x)
                outputs[idx] = x

            elif type == 'shortcut':
                n = int(block['from'])
                x = outputs[idx-1] + outputs[idx+n]
                outputs[idx] = x

            elif type == 'route':
                layers = [int(i) for i in block['layers'].split(',')]
                if len(layers) == 1:
                    x = outputs[idx + layers[0]]
                else:
                    x_1 = outputs[idx + layers[0]]
                    x_2 = outputs[layers[1]]
                    x = torch.cat((x_1, x_2), 1)
                outputs[idx] = x

            elif type == 'yolo':
                # get the attibutes
                anchors = self.module_list[idx][0].anchors
                im_size = int(self.net_setting['height'])
                n_classes = int(block['classes'])
                outputs[idx] = x

                # Convert the feature map into 2D tensor
                x = x.data
                x = predict_transform(x, im_size, anchors, n_classes, CUDA)

                if temp == 0:
                    detections = x
                    temp = 1
                else:
                    detections = torch.cat((detections, x), 1)

        return detections

## Understanding the structure of `blocks` and `module_list` 

In [32]:
# blocks
blocks = parse_cfg('yolo_v3.cfg')

for b in blocks[85:88]: 
  print(b)

{'type': 'convolutional', 'batch_normalize': '1', 'filters': '256', 'size': '1', 'stride': '1', 'pad': '1', 'activation': 'leaky'}
{'type': 'upsample', 'stride': '2'}
{'type': 'route', 'layers': '-1, 61'}


In [57]:
# module list
net_setting, module_list = create_modules(blocks)
print(module_list)

ModuleList(
  (0): Sequential(
    (conv_0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (batch_norm_0): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (leaky_0): LeakyReLU(negative_slope=0.1, inplace)
  )
  (1): Sequential(
    (conv_1): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (batch_norm_1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (leaky_1): LeakyReLU(negative_slope=0.1, inplace)
  )
  (2): Sequential(
    (conv_2): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (batch_norm_2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (leaky_2): LeakyReLU(negative_slope=0.1, inplace)
  )
  (3): Sequential(
    (conv_3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (batch_norm_3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running

In [56]:
print(net_setting)

{'type': 'net', 'batch': '64', 'subdivisions': '16', 'width': '416', 'height': '416', 'channels': '3', 'momentum': '0.9', 'decay': '0.0005', 'angle': '0', 'saturation': '1.5', 'exposure': '1.5', 'hue': '.1', 'learning_rate': '0.001', 'burn_in': '1000', 'max_batches': '500200', 'policy': 'steps', 'steps': '400000,450000', 'scales': '.1,.1'}


### Detection Layer

In [162]:
layers = module_list[3]
layers

Sequential(
  (conv_3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (batch_norm_3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (leaky_3): LeakyReLU(negative_slope=0.1, inplace)
)

In [161]:
blocks[4]

{'type': 'convolutional', 'batch_normalize': '1', 'filters': '64', 'size': '3', 'stride': '1', 'pad': '1', 'activation': 'leaky'}

In [165]:
print("Conv Layer:", layers[0])
print("BN Layer:", layers[1])

Conv Layer: Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
BN Layer: BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)


In [198]:
# The number of weights in Conv Layer
len(layers[0].weight)

64

In [199]:
# The number of bias in BN Layer
len(layers[1].bias)

64

In [43]:
module_list[82]

Sequential(
  (detection_82): DetectionLayer()
)

In [46]:
module_list[82][0]

DetectionLayer()

As the outcome is the `DetectionLayer`class, it contains `anchors` as an attibute. 

In [54]:
module_list[82][0].anchors

[(116, 90), (156, 198), (373, 326)]

## Testing

In [0]:
def load_test_data(img_file):
    img = cv2.imread('ex.jpg')
    img = cv2.resize(img, (416, 416))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = img.transpose((2, 0, 1))       # Channel - hight - width
    img = img[None, :, :, :]/255.0
    img = Variable(torch.from_numpy(img).float(), requires_grad = True)
    return img

In [157]:
img = load_test_data('ex.jpg')
model = Darknet(blocks, net_setting, module_list)

pred = model.forward(img, torch.cuda.is_available())
print(pred)

  "See the documentation of nn.Upsample for details.".format(mode))


tensor([[[ 16.5786,  15.9309, 112.9327,  ...,   0.4985,   0.5016,   0.5413],
         [ 15.9393,  16.8631, 154.8435,  ...,   0.5021,   0.5374,   0.5287],
         [ 16.9896,  15.9145, 395.9537,  ...,   0.4997,   0.4942,   0.4988],
         ...,
         [411.9830, 411.9883,   9.6338,  ...,   0.4991,   0.5365,   0.5457],
         [412.4276, 412.0823,  15.6602,  ...,   0.4955,   0.5147,   0.5197],
         [411.9772, 412.1264,  34.2431,  ...,   0.4971,   0.4991,   0.5054]]])


In [158]:
pred.size()

torch.Size([1, 10647, 85])

## Load the pretrained weight

In [171]:
!wget https://pjreddie.com/media/files/yolov3.weights

--2019-07-28 14:13:13--  https://pjreddie.com/media/files/yolov3.weights
Resolving pjreddie.com (pjreddie.com)... 128.208.4.108
Connecting to pjreddie.com (pjreddie.com)|128.208.4.108|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 248007048 (237M) [application/octet-stream]
Saving to: ‘yolov3.weights’


2019-07-28 14:13:20 (36.0 MB/s) - ‘yolov3.weights’ saved [248007048/248007048]



In [201]:
fp = open('yolov3.weights', 'rb')
header = np.fromfile(fp, dtype = np.int32, count = 5)
print(header)

[       0        2        0 32013312        0]


In [202]:
# fp = open('yolov3.weights', 'rb')
weights = np.fromfile(fp, dtype = np.float32)
print(weights)

[-4.316885   -0.7578076  -2.1098018  ... -0.03373355 -0.04969028
 -0.11880384]


Let's fetch the weight of the first batch normalization layer for pratice.

In [203]:
layers = module_list[1]
print("ConvLayer: ", layers[0])
print("BNLayer: ", layers[1])
print("Activation: ", layers[2])

ConvLayer:  Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
BNLayer:  BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
Activation:  LeakyReLU(negative_slope=0.1, inplace)


In [204]:
# The number of weights in the BNLayer
a = len(layers[1].bias)
print(a)

64


In [206]:
pos = 0
bn_bias = weights[pos:pos+a]   
print("The bias of BN layer:\n", bn_bias)

The bias of BN layer:
 [-4.316885   -0.7578076  -2.1098018   1.7402638   1.4071269  -3.0952053
 -0.38860837  0.75603795  1.982805    1.2893223   0.652888    2.6263633
  2.3013082  -2.04827    -3.7340226  -2.0467598   3.845535   -1.0419698
 -0.30135924 -0.35420752 -3.5354283  -2.628548    0.74821305  0.3917957
  2.3627155  -1.7907225   2.5973146  -0.34963462 -2.6972923  -2.688753
  0.99702346 -0.20098142  2.6223972   1.3536469   1.6286678   0.99905145
 -1.0922571   2.6365428   0.9870672   1.6019626   0.8613433  -1.3214242
  1.1380545   1.0997647   1.7826979   1.5409452   2.5057783   1.8394158
  0.9597077   1.506216    1.1141889   1.1447448   1.7145146  -2.1283774
 -1.0026759  -1.4917758   0.65230376  1.4994049   1.6686112   1.1225682
  2.2276077   2.1827738   1.1739247   1.1685042 ]


In [207]:
pos += a
bn_weight = weights[pos:pos+a]   
print("The weight of BN layer:\n", bn_weight)

The weight of BN layer:
 [-0.5719402  -0.7738392  -0.28336832  0.39322987  0.00780761 -0.6109912
 -0.16133149 -0.22186707 -0.29902607  0.11704189 -0.00553402  0.01755599
 -0.02504491  0.00425338  0.42962    -0.44072402 -0.2520025  -0.21482663
 -0.00248128 -0.2174843  -0.78819096  0.4381482  -0.06644736  0.11193772
  0.47471324 -0.00680234  0.05487546 -0.00235039  0.5519605  -0.60440457
  0.1623201  -0.01310652  0.08165007  0.14167316  0.02880523  0.07606807
  0.01272001  0.10170069  0.01465652  0.29286936  0.13619897  0.10157995
  0.00747671  0.01324554  0.12384079  0.01671677  0.05822505  0.05930388
  0.12105063  0.01601543  0.00481599  0.0299536   0.15219504  0.05451044
  0.16748613  0.28054863  0.1239534   0.0021424   0.04058676  0.00720256
  0.07339441  0.09430981  0.2655725   0.00556735]


In [208]:
pos += a
bn_mean = weights[pos:pos+a]   
print("The mean of BN layer:\n", bn_mean)

The mean of BN layer:
 [-0.07398386  0.17927837 -0.03000734  0.09115071 -0.01687618  0.09801032
  0.00271729 -0.72746915  0.02753209 -0.0435802  -0.10812147 -0.02765798
  0.03745255 -0.14916518  0.06877749  0.04023559 -0.41197103  0.06216228
  0.0263346   0.11036139 -0.07168365  0.06076885  0.05067024  0.03663862
 -0.00710725 -0.44482297  0.01447257 -0.16561401 -0.13958108 -0.07555092
 -0.02653021 -0.25811696 -0.0265929  -0.02875255  0.03781441 -0.05215133
 -0.18111701 -0.15509231  0.10516036 -0.03052539 -0.26097408  0.06879067
  0.04111101  0.08765321  0.00564741 -0.21097177 -0.17506115  0.03373048
 -0.09253465 -0.24226649  0.10729069 -0.03975431  0.01255679  0.02010574
  0.09200411  0.01070675 -0.09049273 -0.20219928 -0.11879097 -0.06624901
 -0.08976902 -0.13811569 -0.00273665  0.1540454 ]


In [209]:
pos += a
bn_var = weights[pos:pos+a]   
print("The var of BN layer:\n", bn_var)

The var of BN layer:
 [ 0.23836261  0.10741682 -0.18967193  0.11737569  0.15239213 -0.27533895
 -0.16242985 -0.01306018  0.09999298  0.22160274  0.16180561 -0.21416326
  0.09505861  0.13041629 -0.34165373 -0.21131764 -0.04622072 -0.5134506
  0.42515525  0.02908342 -0.07077545  1.0736752   0.6742873  -0.508841
  0.21633276 -0.01211012 -0.36854535  0.09555301 -0.37258154 -0.11152145
  0.6298196   0.24631211 -0.40302163 -0.00870701 -0.17198852 -0.11540813
  0.2767226  -0.1285944  -0.073783    0.37037814  0.10376796 -0.32020456
  0.01049588 -0.20812213 -0.43619505 -1.5966752  -0.54551464  0.05627443
 -0.06868107  0.11647671  0.6183516   1.1793071   0.6681737   0.02183359
  0.19343184  0.03098689 -0.02533171  0.13192055 -0.09908673 -0.02646758
 -0.04517018 -0.1017449   0.5415385   1.4532353 ]


Now we're going to load these weights to our model accordingly. 

In [217]:
bn_layer = layers[1]
bn_layer.bias.data.size()
bn_bias = torch.from_numpy(bn_bias)
bn_bias.view_as(bn_layer.bias.data)

tensor([-4.3169, -0.7578, -2.1098,  1.7403,  1.4071, -3.0952, -0.3886,  0.7560,
         1.9828,  1.2893,  0.6529,  2.6264,  2.3013, -2.0483, -3.7340, -2.0468,
         3.8455, -1.0420, -0.3014, -0.3542, -3.5354, -2.6285,  0.7482,  0.3918,
         2.3627, -1.7907,  2.5973, -0.3496, -2.6973, -2.6888,  0.9970, -0.2010,
         2.6224,  1.3536,  1.6287,  0.9991, -1.0923,  2.6365,  0.9871,  1.6020,
         0.8613, -1.3214,  1.1381,  1.0998,  1.7827,  1.5409,  2.5058,  1.8394,
         0.9597,  1.5062,  1.1142,  1.1447,  1.7145, -2.1284, -1.0027, -1.4918,
         0.6523,  1.4994,  1.6686,  1.1226,  2.2276,  2.1828,  1.1739,  1.1685])