## 1. Read in the different layers of YOLO from the .cfg file

In [3]:
import torch 
import torch.nn as nn
import torch.functional as F
from torch.autograd import Variable
import numpy as np

### The read_lines_cfg function:

    This function simply returns all the lines in the cfg file after some preprocessing. 
    The preprocessing steps include the following :
    
    1. Removing the leading (lstrip()) and trailing (rstrip()) whitespaces.
    2. Appending a line only if it is not a comment("begins with a '#'")
    3. Appending a line only if it  not empty.
     

In [175]:
def read_lines_cfg(file):
    #opening given file in read mode
    with open(file=file, mode='r') as yolo:
        lines = [(line.lstrip()).rstrip() for line in yolo if (line[0] != '#' and not line is '\n')]
        return lines
cfg_lines = read_lines_cfg('yolo_v3/yolov3_cfg/yolov3.cfg')

### Store each layer separately in the "layers" list
    
    We now need to store the information about each layer in a dictionary. The dictionary of info
    about each layer is then added to a list. This shall be later used to construct the yolo 
    network according to the specifications laid out in the respective dictionary of each layer.

In [176]:
def get_layers(lines_list):
    layer = {}
    layers = []
    for line in lines_list:
        #check for new layer.
        if line[0] == "[":
            #This condition is to prevent an empty dictionary from being added 
            #before adding the first block.
            if len(layer) != 0:
                layers.append(layer)
                layer = {}
            #Get the value inside square brackts that tells the type of layer we are dealing with.
            layer_type = line[1:-1]
            layer["layer_type"] = layer_type
        else:
            #getting the attribute of the layer_type and value for each atrribute.
            attrib, val = line.split("=")
            #removing the trailing and leading whitespaces for the key and value respectively. 
            layer[attrib.rstrip()] = val.lstrip()
    return layers

In [177]:
layers_list = get_layers(cfg_lines)

## 2. Code up the building blocks

### These class definitions will be later used to construct the YOLO v3 network

    The Convolutional Layer from the cfg file could also comprise of batchNorm and activation layers as well. 
    Therefore we need a function which creates a module containing the convolutional, the batchNorm and the 
    activation layer. The nn.Sequential class would be the module containing the layers, and it ensures the
    sequential execution of the layers that it encapsulates.

## 2.1 Building-Block Functions for "Convolution" and "UpSample"

    PyTorch already has inbuilt support for Convolution and Upsample Layers. 
    Therefore we dont need to create classes for these layers since they already 
    have class definitions which inherit from nn.Module. 
    
    We simply need to create functions that create and return a "layer module"
    for the respective layer, tailored to the specification mentioned in the 
    dictionary for that layer.

### 2.1.1 Function to create the convolutional Module

    1. The function takes in the dictionary containing information about the particular layer.
    This includes:
        a. Values of number of filters, kernel_size, stride.
        b. Whether the conv "module" has a BatchNorm Layer.
    
    2. The function also takes in the number of filters in the previous layer. This is the 
       value that the "in_channels" attribute of the nn.Conv2D method takes in.
    
    3. Index which indicates the position of the given convolutional layer among the 
       layers of the yolo net in chronological order.
      

In [234]:
def conv_module(info_dict, prev_filters, index):
    filters = int(info_dict["filters"])
    kernel_size = int(info_dict["size"])
    stride = int(info_dict["stride"])
    
    #for every convolution layer in yolo-tiny v3, pad = 1 and this implies "same" padding
    """
    "same" padding : the amount of padding applied so that the height and width
                     of the output feature map (resulting from the conv2D operation)
                     is the same as the input height and width
    """
    
    #the value of padding that accomplishes this task (can be derived from simple convolution arithmetic)
    padding = (kernel_size - 1) // 2
    
    #batchNorm already contains a bias term by definition. 
    #Therefore, bias is set to false if conv layer contains a batchNorm layer.
    #It's almost like english!
    bias = False if "batch_normalize" in info_dict else True
    
    #creating the conv_layer
#     print(prev_filters, filters, kernel_size, stride, padding, bias)
    conv_layer = nn.Conv2d(prev_filters, filters, kernel_size=kernel_size, stride=stride, padding=padding, bias=bias)
    #let us now initialize an nn.Sequential class object
    #that will store all the layers we created into this module
    conv_module = nn.Sequential()
    conv_module.add_module("Conv_Layer{}".format(index), conv_layer)
    
    #check for batchNorm since every convolution layer does not have a batchNorm layer
    #We can do this by checking the bias for the convolutional layer.
    if not bias:
        batchNorm_layer = nn.BatchNorm2d(filters)
        conv_module.add_module("BatchNorm_Layer{}".format(index), batchNorm_layer)
    
    #activation for the conv layer could be either "linear" or "leaky relu". 
    #the following condition checks for this
    if info_dict["activation"] == "leaky":
        activation_layer = nn.LeakyReLU(negative_slope=0.1, inplace=True)
        conv_module.add_module("Activation_Layer{}".format(index), activation_layer)
    
    return (filters, conv_module)

### 2.1.2 Function to create the upsample module

In [181]:
def upsample_module(info_dict, index):
    
    scale_factor = info_dict["stride"]
    upsample_layer = nn.Upsample(scale_factor=scale_factor, mode="bilinear")
    
    #instantiate an object of nn.Sequential class to encapsulate the upsample layer
    upsample_module = nn.Sequential()
    upsample_module.add_module("Upsample_Layer{}".format(index), upsample_layer)
    
    return upsample_module

### 2.1.3 Function to create the shortcut module

In [287]:
def shortcut_module(info_dict, index):
    
    shortcut_from = info_dict["from"]
    shortcut_layer = SHORTCUT_layer(index, shortcut_from)
    shortcut_module = nn.Sequential()
    shortcut_module.add_module("Shorcut_Layer{}".format(index), shortcut_layer)

    return shortcut_module

In [286]:
class SHORTCUT_layer(nn.Module):
    def __init__(self, index, shortcut_from):
        super(SHORTCUT_layer, self).__init__()
        self.index = index
        self.shortcut_from = shortcut_from
    
    def forward(self, output_tracker):
        return output_tracker[self.index - 1] + output_tracker[self.index + self.shortcut_from]

## 2.2 Building-Block Classes for Route and YOLO 

### 2.2.1 Route Block:
    Here we wish to perform the following tasks:
        
        1. Create a function to count the number of filters that the route block concatenates
        2. Create a class with route block functionality.

In [267]:
"""
route_filters_count takes in:
    1. info_dict - info_dict for the route block
    2. index - the position of the route block int the net
    3. output_filters - list that keeps track of the number of output filters from all blocks
"""
def route_filters_count(info_dict, index, output_filters):
    num_filters = 0
    layers = info_dict["layers"].split(",")
    
    #since the route module could have either one or two layers to route,
    #the following try block will check for this.
    try:
        layer0, layer1 = int(layers[0]), int(layers[1])
    except:
        layer0, layer1 = int(layers[0]), 0
    
    print(layer0, layer1)
        
    #values for layer1 is always zero for yolo-v3 tiny and layer2 is either >=0
    #the following is only for the purpose of appending to the output_filters list.
    
    #these cases dont arise in yolo-tiny v3, but could possibly arise in yolo v3.
    if layer0 > 0:
        layer0 = layer0 - index
       
    if layer1 > 0:
        layer1 = layer1 - index
        
    #check if layers contains only one value    
    if layer1 < 0:
        num_filters = output_filters[index + layer0] + output_filters[index + layer1]
    
    else:
        num_filters = output_filters[index + layer0]

    return (num_filters, layer0, layer1)

#### The ROUTE layer class def
    The route layer class needs to be defined by us, since PyTorch does not provide
    support for the route layer. The route layer is initialized with the values
    of layer0 and layer1(if any). The forward method, finds the ouput that the 
    route block has to route from previous layers.
    
    To accomplish this, the outputs of different layers in the forward pass
    are tracked in the output_tracker. This output_tracker is passed 
    when finding which outputs to route, (and possibly concatenate). 
    
    The output that is to be routed is returned.

In [268]:
class ROUTE_layer(nn.Module):
    def __init__(self, index, layer0, layer1=None):
        super(ROUTE_layer, self).__init__()
        self.index = index
        self.layer0 = layer0
        if layer1 is None:
            self.len_layers = 1
            pass
        else:
            self.len_layers = 2
            self.layer1 = layer1
    
    def forward(self, output_tracker):
        if self.len_layers == 1:
            x = output_tracker[self.index + layer0]
        else:
            #joining the two maps along the depth dimension using torch's cat function 
            x = torch.cat((output_tracker[self.index+self.layer0], output_tracker[self.index+self.layer1]), 1)
        return x

#### Creating a ROUTE building block
    We now bring to together the function that counts the number of output filters and the 
    Class that instantiates the route layer. The following function first finds the number
    of filters, and layers values for the route layer. These "layers" values are first used to
    create an object of the route layer. 
    
    This route layer is bundled into a route module with the nn.Sequential class and is returned
    to the module_list building function along with the number of filters.

In [269]:
def route_module(info_dict, index, output_filters):
    
    
    filters, layer0, layer1 = route_filters_count(info_dict, index, output_filters)
    if layer1 == 0:
        route_layer = ROUTE_layer(index, layer0)
    else:
        route_layer = ROUTE_layer(index, layer0, layer1)
    
    route_module = nn.Sequential()
    route_module.add_module("Route_Module{}".format(index), route_layer)
    return (filters, route_module)

### 2.2.2  YOLO Block:

    Following along the lines of the Route Block, we will:
        1. Create a function to choose the anchors to use for the YOLO block
        2. Create a class for the YOLO Block that inherits from nn.Module

In [270]:
def get_yolo_anchors(info_dict, index):
    #get the mask values from info_dict
    mask = [int(x) for x in info_dict["mask"].split(",")]

    anchors = [int(x) for x in info_dict["anchors"].split(",")]
    masked_anchors = []
    for position in mask:
        #the tuple of anchor values we need are at [2*position] and [2*position  + 1]
        #Eg, if mask = 0, we need a tuple with values at [2*0] and [2*0 + 1] = [0,1]
        masked_anchors.append((anchors[2*position], anchors[2*position + 1]))
    
    print(index, masked_anchors)
    return masked_anchors    

#### YOLO_layer layer definition

In [271]:
class YOLO_layer(nn.Module):
    def __init__(self, masked_anchors):
        super(YOLO_layer, self).__init__()
        self.masked_anchors = masked_anchors

#### The function that brings together get_yolo_anchors and YOLO_layer

In [272]:
def yolo_module(info_dict, index):
    
    yolo_layer = YOLO_layer(get_yolo_anchors(info_dict, index))
    yolo_module = nn.Sequential()
    yolo_module.add_module("YOLO_layer{}".format(index), yolo_layer)
    
    return yolo_module

## 3. Bringing Section 2 together

    We can now define a function that creates an nn.ModuleList that stores all nn.Module objects.
    These nn.Module objects would contain the building blocks that we defined earlier.
    The function accepts the layers_list that we created in section 1.

In [288]:
def create_module_list(layers_list):
    module_list = nn.ModuleList() #stores all the nn.Module objects 
    prev_filters = 3 #since we start out with color images
    output_filters = []
    
    #the info about the neural net itself is contained in the first dictionary 
    #of the layers_list, and is not really a "layer"
    yolo_info = layers_list[0]
    
    for index, layer_dict in enumerate(layers_list[1:]):
        
        if layer_dict["layer_type"] == "convolutional":
            filters, module = conv_module(layer_dict, prev_filters, index)
        
        elif layer_dict["layer_type"] == "upsample":
            module = upsample_module(layer_dict, index)
        
        elif layer_dict["layer_type"] == "route":
            filters, module = route_module(layer_dict, index, output_filters)
        
        elif layer_dict["layer_type"] == "shortcut":
            module = shortcut_module(layer_dict, index)
            
        elif layer_dict["layer_type"] == "yolo":
            module = yolo_module(layer_dict, index)

        else:
            continue
        module_list.append(module)        
        prev_filters = filters
        output_filters.append(filters)
#     print(prev_filters)
    print(output_filters)
    return (yolo_info, module_list)

In [289]:
info, mod_list=create_module_list(layers_list)

82 [(116, 90), (156, 198), (373, 326)]
-4 0
-1 61
94 [(30, 61), (62, 45), (59, 119)]
-4 0
-1 36
[32, 64, 32, 64, 64, 128, 64, 128, 128, 64, 128, 128, 256, 128, 256, 256, 128, 256, 256, 128, 256, 256, 128, 256, 256, 128, 256, 256, 128, 256, 256, 128, 256, 256, 128, 256, 256, 512, 256, 512, 512, 256, 512, 512, 256, 512, 512, 256, 512, 512, 256, 512, 512, 256, 512, 512, 256, 512, 512, 256, 512, 512, 1024, 512, 1024, 1024, 512, 1024, 1024, 512, 1024, 1024, 512, 1024, 1024, 512, 1024, 512, 1024, 512, 1024, 255, 255, 512, 256, 256, 768, 256, 512, 256, 512, 256, 512, 255, 255, 256, 128, 128, 384, 128, 256, 128, 256, 128, 256, 255]


In [291]:
# info
# mod_list

## 4. Building the YOLO net

        We will now use the module_list that we constructed in section 3 to build up the
        class for the YOLO network. We would also need to workout the forward pass for the
        YOLO net. 
        
        The net itself inherits nn.Module class, like every one of its individual
        layers. This makes sense, since the YOLO net can be thought of as a huge layer/function.
        That is indeed the case, since any neural net is a composition of functions.

In [None]:
class YOLOv3(nn.Module):
    def __init__(self, config_filename):
        super(YOLOv3, self).__init__()
        self.layers_list = read_lines_cfg(config_filename)
        self.mod_list = create_module_list(self.layers_list)
    
    def forward(self, x, cuda):
        output_tracker = {}
        
        for index, layer in enumerate(self.layers_list[1:]):
            
            if layer["type"] == "convolutional":
                out = self.module_list[index](x)
                
            elif layer["type"] == "upsample":
                out = self.module_list[index](x)
            
            elif layer["type"] == "route":
                out = self.module_list[index](output_tracker)
            
            elif layer["type"] == "shortcut":
                

## 4. Function to transform the output

In [252]:
len(mod_list)

106

In [254]:
# for index, layer in enumerate(layers_list):
#     if layer["layer_type"] == "convolutional":
#     if layer["layer_type"] == "yolo":
#         anchors = [int(x) for x in layer["anchors"].split(",")]
#         print(anchors)
#         print(layer["layers"].split(","))
#         print(index, layer["size"])