### Normalization

1. Normalization techniques can decrease your model’s training time by a huge factor. 

2. It normalizes each feature so that they maintains the contribution of every feature, as some feature has higher numerical value than others. This way our network can be unbiased(to higher value features).

3. It reduces Internal Covariate Shift. It is the change in the distribution of network activations due to the change in network parameters during training. To improve the training, we seek to reduce the internal covariate shift.

4. Optimization occurs faster as exploding/diminishing gradients are reduced.

5. It provides regularization also which is mostly an unintended side-effect.

#### Batch Normalization
Normalization across the minibatch. To learn the shift and scale that might be useful we add the $\gamma, \beta$ respectively.
Calculated params:

mini-batch mean, $\mu_{B} = \frac{1}{m} \sum^{m}_{i=1} x_{i}$

mini-batch var, $\sigma^{2}_{B} = \frac{1}{m}\sum^{m}_{i=1} (x_{i}-\mu_{B})^{2}$

normalize, $\widehat{x}_{i} = \frac{x_{i}-\mu{B}}{\sqrt{\sigma^{2}_{B}+\epsilon}}$

scale and shift, $y_{i} = \gamma\widehat{x_{i}}+\beta = BN_{\gamma,\beta(x_{i})}$


In [1]:
import tensorflow as tf
import numpy as np
from PIL import Image, ImageDraw, ImageFont
from IPython.display import display
from seaborn import color_palette
import cv2
_BATCH_NORM_DECAY = 0.9
_BATCH_NORM_EPSILON = 1e-05
_LEAKY_RELU = 0.1
_ANCHORS = [(10, 13), (16, 30), (33, 23),
            (30, 61), (62, 45), (59, 119),
            (116, 90), (156, 198), (373, 326)]
_MODEL_SIZE = (416, 416)

In [2]:
# batch norm
def batch_norm(inputs, training, data_format):
    return tf.layers.batch_normalization(inputs=inputs, 
                                         axis=1 if data_format == 'channels_first' else 3,
                                         momentum=_BATCH_NORM_DECAY,
                                         epsilon=_BATCH_NORM_EPSILON,
                                         scale=True, training=training
                                        )


def fixed_padding(inputs, kernel_size, data_format):
    pad_total = kernel_size - 1
    pad_beg = pad_total//2
    pad_end = pad_total - pad_beg
    
    if data_format == 'channels_first':
        padded_inputs = tf.pad(inputs,[[0,0],[0,0],
                                      [pad_beg, pad_end],
                                      [pad_beg, pad_end]])
    else:
        padded_inputs = tf.pad(inputs,[[0,0],[pad_beg, pad_end],
                                      [pad_beg, pad_end], [0,0]])
    return padded_inputs

In [3]:
t = tf.constant([[1, 2, 3], [4, 5, 6]])
paddings = tf.constant([[1, 1], [2, 2]])
# paddings constants describe the amount of padding, here
# [[top_padding, bottom_padding],[left_padding, right_padding]]

In [4]:
sess = tf.Session()
with sess.as_default():
    print(t.eval())
    print(paddings.eval())
    print(tf.pad(t, paddings, "CONSTANT").eval())
#    print(tf.pad(t, paddings, "REFLECT").eval())
    print(tf.pad(t, paddings, "SYMMETRIC").eval())
    print(paddings.get_shape().as_list())
    print(tf.reshape(paddings, [-1,1]).eval())  
    x = tf.range(10, dtype=tf.float32)
    y = tf.range(10, dtype=tf.float32)
    x_offset, y_offset = tf.meshgrid(x,y)
    x_offset = tf.reshape(x_offset, (-1,1))
    y_offset = tf.reshape(y_offset, (-1,1))
    # changes rows to columns, columns remain columns
    
    x_y_offset = tf.concat([x_offset, y_offset], axis=-1)
    #print(x_y_offset.eval())
    a = tf.constant([[1,2,3],[4,5,6]], tf.int32)
    b = tf.constant([2,2], tf.int32)
    print(tf.tile(a,b).eval())
    [print(i.eval()) for i in tf.unstack(a, axis=1)]
    B = tf.constant([[2, 20, 30, 3, 6], [3, 11, 16, 1, 8],
                 [14, 45, 23, 5, 27]])
    d3 = tf.constant([[[1,2],[3,4]],[[4,3],[9,8]],[[9,8],[0,10]]])
    
    #this calculates max index along all cols in the row
    print(tf.argmax(B, axis=0).eval())
    
    #this calculates max index along all rows in each col
    print(tf.argmax(B, axis=1).eval())
    print(d3.eval(), 'shape',tf.shape(d3).eval())
    print(tf.argmax(d3, axis=0).eval())
    print(tf.argmax(d3, axis=1).eval())
    print(tf.argmax(d3, axis=2).eval())
    print('comp section')
    xs = tf.constant([1,2,3,4,5,6,7], dtype=tf.int32)
    print(tf.boolean_mask(xs,xs>5).eval())



[[1 2 3]
 [4 5 6]]
[[1 1]
 [2 2]]
[[0 0 0 0 0 0 0]
 [0 0 1 2 3 0 0]
 [0 0 4 5 6 0 0]
 [0 0 0 0 0 0 0]]
[[2 1 1 2 3 3 2]
 [2 1 1 2 3 3 2]
 [5 4 4 5 6 6 5]
 [5 4 4 5 6 6 5]]
[2, 2]
[[1]
 [1]
 [2]
 [2]]
[[1 2 3 1 2 3]
 [4 5 6 4 5 6]
 [1 2 3 1 2 3]
 [4 5 6 4 5 6]]
[1 4]
[2 5]
[3 6]
[2 2 0 2 2]
[2 2 1]
[[[ 1  2]
  [ 3  4]]

 [[ 4  3]
  [ 9  8]]

 [[ 9  8]
  [ 0 10]]] shape [3 2 2]
[[2 2]
 [1 2]]
[[1 1]
 [1 1]
 [0 1]]
[[1 1]
 [0 0]
 [0 1]]
comp section
[6 7]


In [5]:
def conv2d_fixed_padding(inputs, filters, kernel_size,  data_format, strides=1):
    if strides>1:
        inputs = fixed_padding(inputs, kernel_size, data_format)
    return tf.layers.conv2d(inputs=inputs, 
                            filters=filters, 
                            kernel_size=kernel_size,
                            strides=strides,
                           padding=('SAME' if strides == 1 else 'VALID'),
                           use_bias=False, data_format=data_format)

### Feature extraction: Darknet-53
It does not has the layers required for classification at the end i.e.  Avgpool, Connected, softmax since its just used for feature extraction

In [6]:
def darknet53_residual_block(inputs, filters, training, data_format, strides=1):
    shortcut = inputs

    inputs = conv2d_fixed_padding(
        inputs, filters=filters, kernel_size=1, strides=strides,
        data_format=data_format)
    inputs = batch_norm(inputs, training=training, data_format=data_format)
    inputs = tf.nn.leaky_relu(inputs, alpha=_LEAKY_RELU)

    inputs = conv2d_fixed_padding(
        inputs, filters= 2 * filters, kernel_size=3, strides=strides,
        data_format=data_format)
    inputs = batch_norm(inputs, training=training, data_format=data_format)
    inputs = tf.nn.leaky_relu(inputs, alpha=_LEAKY_RELU)

    inputs += shortcut

    return inputs
    

In [7]:
def darknet53(inputs, training, data_format):
    inputs = conv2d_fixed_padding(inputs, filters=32, kernel_size=3, data_format=data_format)
    inputs = batch_norm(inputs, training=training, data_format=data_format)
    inputs = tf.nn.leaky_relu(inputs, alpha=_LEAKY_RELU)
    
    inputs = conv2d_fixed_padding(inputs, filters=64, kernel_size=3, strides=2, data_format=data_format)
    inputs = batch_norm(inputs, training=training, data_format= data_format)
    inputs = tf.nn.leaky_relu(inputs, alpha=_LEAKY_RELU)
    
    inputs = darknet53_residual_block(inputs, filters=32, training=training, data_format=data_format)
    inputs =  conv2d_fixed_padding(inputs, filters=128, kernel_size=3, strides=2, data_format=data_format)
    inputs = batch_norm(inputs, training=training,data_format=data_format)
    inputs = tf.nn.leaky_relu(inputs, alpha=_LEAKY_RELU)
    
    
    for k in range(2):
    
        inputs = darknet53_residual_block(inputs, filters=64, training=training, data_format=data_format)
    
        
    inputs = conv2d_fixed_padding(inputs, filters=256, kernel_size=3, strides=2, data_format=data_format)
    inputs = batch_norm(inputs, training=training, data_format=data_format)
    inputs = tf.nn.leaky_relu(inputs, alpha=_LEAKY_RELU)
        
    
        
    
    for _ in range(8):
        
        inputs = darknet53_residual_block(inputs, filters=128, training=training, data_format=data_format)
        
    route1 = inputs
        
    inputs = conv2d_fixed_padding(inputs, filters=512, kernel_size=3, strides=2, data_format=data_format)
    inputs = batch_norm(inputs, training=training, data_format=data_format)
    inputs = tf.nn.leaky_relu(inputs, alpha=_LEAKY_RELU)
    
    for _ in range(8):
        inputs = darknet53_residual_block(inputs, filters=256, training=training, data_format=data_format)
        
    route2 = inputs
        
    inputs = conv2d_fixed_padding(inputs, filters=1024, kernel_size=3, strides=2, data_format=data_format)
    inputs = batch_norm(inputs, training=training, data_format=data_format)
    inputs = tf.nn.leaky_relu(inputs, alpha=_LEAKY_RELU)
    
    for _ in range(4):
        inputs = darknet53_residual_block(inputs, filters=512, training=training, data_format=data_format)

    return route1, route2, inputs       
    

#### 1x1 convolution layer usage

A 1×1 convolutional layer can be used that offers a channel-wise pooling, often called feature map pooling or a projection layer. 

This simple technique can be used for dimensionality reduction, decreasing the number of feature maps whilst retaining their salient features. 

It can also be used directly to create a one-to-one projection of the feature maps to pool features across channels or to increase the number of feature maps, such as after traditional pooling layers.

In [8]:
def yolo_convolution_block(inputs, filters, training, data_format):
    
    inputs = conv2d_fixed_padding(inputs, filters=filters, kernel_size=1, data_format=data_format)
    inputs = batch_norm(inputs, training=training, data_format=data_format)
    inputs = tf.nn.leaky_relu(inputs, alpha=_LEAKY_RELU)
    
    inputs = conv2d_fixed_padding(inputs, filters= 2 * filters, kernel_size=3, data_format=data_format)
    inputs = batch_norm(inputs, training=training, data_format=data_format)
    inputs = tf.nn.leaky_relu(inputs, alpha=_LEAKY_RELU)
    
    inputs = conv2d_fixed_padding(inputs, filters=filters, kernel_size=1, data_format=data_format)
    inputs = batch_norm(inputs, training=training, data_format=data_format)
    inputs = tf.nn.leaky_relu(inputs, alpha=_LEAKY_RELU)
    
    inputs = conv2d_fixed_padding(inputs, filters=2 * filters, kernel_size=3, data_format=data_format)
    inputs = batch_norm(inputs, training=training, data_format=data_format)
    inputs = tf.nn.leaky_relu(inputs, alpha=_LEAKY_RELU)
    
    inputs = conv2d_fixed_padding(inputs, filters=filters, kernel_size=1, data_format=data_format)
    inputs = batch_norm(inputs, training=training, data_format=data_format)
    inputs = tf.nn.leaky_relu(inputs, alpha=_LEAKY_RELU)
    
    route = inputs
    
    inputs = conv2d_fixed_padding(inputs, filters= 2 * filters, kernel_size=3, data_format=data_format)
    inputs = batch_norm(inputs, training=training, data_format=data_format)
    inputs = tf.nn.leaky_relu(inputs, alpha=_LEAKY_RELU)
    
    return route, inputs

### DETECTION LAYERS

In [9]:
def yolo_layers(inputs, n_classes, anchors, img_size, data_format):
    n_anchors = len(anchors)
    inputs = tf.layers.conv2d(inputs, filters= n_anchors * (5+n_classes), kernel_size=1, 
                             strides=1, use_bias=True, data_format=data_format)
    print('yolo first layer working')
    shape = inputs.get_shape().as_list()
    grid_shape = shape[2:4] if data_format == 'channels_first' else shape[1:3]
    
    if data_format == 'channels_first':
        inputs = tf.reshape(inputs, [0,2,3,1])
    
    
    inputs = tf.reshape(inputs, [-1, n_anchors*grid_shape[0]*grid_shape[1], 5+n_classes])
    strides = (img_size[0]//grid_shape[0], img_size[1]//grid_shape[1])
    print('reshaping done')
    box_centers, box_shapes, confidence, classes = tf.split(inputs, [2,2,1,n_classes], axis=-1)
    print('splitting done')
    x = tf.range(grid_shape[0], dtype=tf.float32)
    y = tf.range(grid_shape[1], dtype=tf.float32)
    x_offset, y_offset = tf.meshgrid(x,y)
    x_offset = tf.reshape(x_offset, (-1,1))
    y_offset = tf.reshape(y_offset, (-1,1))
    x_y_offset = tf.concat([x_offset, y_offset], axis=-1)
    x_y_offset = tf.tile(x_y_offset, [1, n_anchors])
    x_y_offset = tf.reshape(x_y_offset, [1, -1, 2])
    box_centers = tf.nn.sigmoid(box_centers)
    box_centers = (box_centers + x_y_offset) * strides
    
    anchors = tf.tile(anchors, [grid_shape[0] * grid_shape[1],1])
    box_shapes = tf.exp(box_shapes) * tf.to_float(anchors)
    confidence = tf.nn.sigmoid(confidence)
    classes = tf.nn.sigmoid(classes)
    inputs = tf.concat([box_centers, box_shapes, confidence, classes], axis=-1)
    
    return inputs
    
    

In [10]:
def upsample(inputs, out_shape, data_format):
    """
    
    inputs: tensor of the inputs
    out_shape: the shape to which the inputs are to be shaped
   
    """
    if data_format == 'channels_first':
        inputs = tf.transpose(inputs, [0,2,3,1])
        new_height = out_shape[3]
        new_width = out_shape[2]
    
    else:
        new_height = out_shape[2]
        new_width = out_shape[1]
    
    """
    changes the dimensions of the image based on the new dimensions 
    provided
    """
    inputs = tf.image.resize_nearest_neighbor(inputs, (new_height, new_width))
    
    if data_format == 'channel_first':
        inputs = tf.transpose(inputs, [0,3,1,2])
        
    return inputs
        

### Non-max suppression

In [11]:
def build_boxes(inputs):
    
    center_x, center_y, width, height, confidence,classes = tf.split(inputs,[1,1,1,1,1,-1], axis=-1)
    
    top_left_x = center_x - width/2
    top_left_y = center_y - height/2
    bottom_right_x = center_x + width/2
    bottom_right_y = center_y + height/2
    
    boxes = tf.concat([top_left_x, top_left_y, bottom_right_x, bottom_right_y, confidence, classes], axis=-1)
    
    return boxes

def non_max_suppression(inputs, n_classes, max_output_size, iou_threshold, confidence_threshold):
    
    batch = tf.unstack(inputs)
    boxes_dicts = []
    
    for boxes in batch:
        boxes = tf.boolean_mask(boxes, boxes[:,4] > confidence_threshold)
        classes = tf.argmax(boxes[:, 5:], axis=-1)
        boxes_dict = {}
        for cls in range(n_classes):
            mask = tf.equal(boxes[:,5], cls)
            mask_shape = mask.get_shape()
            
            if mask_shape != 0:
                class_boxes = tf.boolean_mask(boxes,  mask)
                boxes_coords, boxes_conf_scores,_ = tf.split(class_boxes, [4,1,-1], axis=-1)
                boxes_conf_scores = tf.reshape(boxes_conf_scores, [-1])
                
                # prunes away boxes with high intersection-over-union score
                indices = tf.image.non_max_suppression(boxes_coords, 
                                                       boxes_conf_scores, 
                                                       max_output_size,
                                                       iou_threshold)
                class_boxes = tf.gather(class_boxes,indices)
                boxes_dict[cls] = class_boxes[:,:5]
        boxes_dicts.append(boxes_dict)
    return boxes_dicts

In [12]:
class Yolo_v3:
    
    def __init__(self, n_classes, model_size, max_output_size, 
                 iou_threshold, confidence_threshold, data_format=None):
        if not data_format:
            if tf.test.is_built_with_cuda():
                data_format = 'channels_first'
            else:
                data_format = 'channels_last'
                
        self.n_classes = n_classes
        self.model_size = model_size
        self.max_output_size = max_output_size
        self.iou_threshold = iou_threshold
        self.confidence_threshold = confidence_threshold
        self.data_format = data_format
        
    def __call__(self, inputs, training):
        with tf.variable_scope('yolo_v3_model'):
            if self.data_format ==  'channels_first':
                inputs = tf.transpose(inputs, [0,3,1,2])
            
            inputs = inputs/255
            
            
            # -------------------------------sec_1-------------------------------
            # running the darknet53 to extract features from multiple sections of nn
            route1, route2, inputs = darknet53(inputs, 
                                               training=training,
                                               data_format=self.data_format
                                              )
            
            
            
            route, inputs = yolo_convolution_block(inputs,
                                                  training=training,
                                                   filters=512,
                                                  data_format=self.data_format)
            print('completed out 1')
            # result0
            detect1 = yolo_layers(inputs, 
                                  n_classes=self.n_classes,
                                  anchors=_ANCHORS[6:9],
                                  img_size=self.model_size,
                                  data_format=self.data_format
                                 )
            print('completed out detect1')
            
            inputs = conv2d_fixed_padding(route, filters=256, kernel_size=1,
                                         data_format=self.data_format)
            inputs = tf.nn.leaky_relu(inputs, alpha=_LEAKY_RELU)
            upsample_size = route2.get_shape().as_list()
            
            
            # size of the inputs decreses as we go deeper in the network
            # we upsample it for concatenation with the result of the 
            # intermediary stages
            inputs = upsample(inputs, 
                              out_shape=upsample_size,
                              data_format=self.data_format)
            axis = 1 if self.data_format == 'channels_first' else 3
            
            # concating the inputs and intermediary result
            inputs = tf.concat([inputs, route2], axis=axis)
            
            route, inputs = yolo_convolution_block(inputs,
                                                   filters=256,
                                                   training=training,
                                                   data_format=self.data_format)
           
            # result1
            detect2 = yolo_layers(inputs, n_classes=self.n_classes,
                                 anchors = _ANCHORS[3:6],
                                 img_size = self.model_size,
                                 data_format=self.data_format)
            
            inputs = conv2d_fixed_padding(route, filters=128, 
                                          kernel_size=1,
                                          data_format=self.data_format)
            inputs = batch_norm(inputs, training=training, data_format=self.data_format)
            inputs = tf.nn.leaky_relu(inputs, alpha=_LEAKY_RELU)
            upsample_size = route1.get_shape().as_list()
            inputs = upsample(inputs, out_shape=upsample_size, data_format=self.data_format)
            inputs = tf.concat([inputs, route1], axis=axis)
            route, inputs = yolo_convolution_block(inputs, 
                                                   filters=128,
                                                   training=training,
                                                   data_format=self.data_format
                                                  )
            # result2
            detect3 = yolo_layers(inputs, n_classes,
                                anchors=_ANCHORS[0:3],
                                img_size=self.model_size,
                                data_format=self.data_format)
            
            
            # combining final results
            """
            
            Detection layers

            Yolo has 3 detection layers, 
            that detect on 3 different scales using respective anchors. 
            For each cell in the feature map the detection layer 
            predicts n_anchors * (5 + n_classes) values using 1x1 convolution. 
            For each scale we have n_anchors = 3. 5 + n_classes means that 
            respectively to each of 3 anchors we are going to predict 
            4 coordinates of the box, its confidence score 
            (the probability of containing an object) and class probabilities.

            """
            inputs = tf.concat([detect1, detect2, detect3], axis=1)
            inputs = build_boxes(inputs)
            
            boxes_dicts = non_max_suppression(inputs,
                                              n_classes=self.n_classes,
                                              max_output_size=self.max_output_size,
                                              iou_threshold=self.iou_threshold,
                                              confidence_threshold=self.confidence_threshold)
            
            return boxes_dicts
            

In [13]:
def load_images(img_names, model_size):
    imgs = []

    for img_name in img_names:
        img = Image.open(img_name)
        img = img.resize(size=model_size)
        img = np.array(img, dtype=np.float32)
        img = np.expand_dims(img, axis=0)
        imgs.append(img)

    imgs = np.concatenate(imgs)

    return imgs

In [14]:
def load_class_names(file_name):
    with open(file_name, 'r') as f:
        class_names = f.read().splitlines()
    return class_names

def draw_boxes(img_names, boxes_dicts, class_names, model_size):
    
    colors = ((np.array(color_palette("hls",80))*255)).astype(np.uint8)
    for num, img_name, boxes_dict in zip(range(len(img_names)), img_names, boxes_dicts):
        img = Image.open(img_name)
        draw = ImageDraw(img)
        font = ImageFont.truetype(font='../input/futur.ttf',
                                 size= (img.size[0]+img.size[1])//100)
        resize_factor = (img.size[0]/model_size[0], img.size[1].model_size[1])
        
        for cls in range(len(class_names)):
            boxes = boxes_dict[cls]
            
            if np.size(boxes) != 0:
                color = colors[cls]
                
                for box in boxes:
                    xy, confidence = box[:4], box[4]
                    xy = [xy[i]*resize_factor[i%2] for i in range(4)]
                    x0,y0 = xy[0], xy[1]
                    thickness = (img.size[0]+ img.size[1])//200
                    
                    for t in np.linsapce(0, 1, thickness):
                        xy[0], xy[1] = xy[0]+t , xy[1]+t
                        xy[2], xy[3] = xy[2]-t, xy[3]-t
                        
                        draw.rectangle(xy, outline=tuple(color))
                    
                    text = '{} {:.1f}%'.format(class_names[cls], confidence*100)
                    text_size = draw.textsize(test, font=font)
                    draw.rectangle(
                        [x0, y0 - text_size[1], x[0]+text_size[0], y0],
                        fill=tuple(color))
                    
                    draw.text((x0,y0-text_size[1]), text, fill='black', font=font)
        display(img)
                        
                    

In [15]:
a = np.arange(9).reshape(3,3)
b = np.arange(9).reshape(3,3)
print(b.shape)
# expanding dimensions is required for concatenation
# without it the elements/axes are concatenated of a tensor
# not the tensors themselves
a1 = np.expand_dims(a, axis=0)
b1 = np.expand_dims(b, axis=0)
np.concatenate([a1,b1])
eg_str = 'a\nb\nc\n'
eg_str.splitlines() #breaks at newlines

(3, 3)


['a', 'b', 'c']

In [48]:
def load_weights(variables, file_name):

    with open(file_name, "rb") as f:
        # Skip first 5 values containing irrelevant info
        np.fromfile(f, dtype=np.int32, count=5)
        weights = np.fromfile(f, dtype=np.float32)

        assign_ops = []
        ptr = 0

        # Load weights for Darknet part.
        # Each convolution layer has batch normalization.
        for i in range(52):
            conv_var = variables[5 * i]
            gamma, beta, mean, variance = variables[5 * i + 1:5 * i + 5]
            batch_norm_vars = [beta, gamma, mean, variance]

            for var in batch_norm_vars:
              
                shape = var.shape.as_list()
                num_params = np.prod(shape)
                var_weights = weights[ptr:ptr + num_params].reshape(shape)
                ptr += num_params
                assign_ops.append(tf.assign(var, var_weights))

            shape = conv_var.shape.as_list()
          
            num_params = np.prod(shape)
            var_weights = weights[ptr:ptr + num_params].reshape(
                (shape[3], shape[2], shape[0], shape[1]))
            var_weights = np.transpose(var_weights, (2, 3, 1, 0))
            ptr += num_params
            assign_ops.append(tf.assign(conv_var, var_weights))

        # Loading weights for Yolo part.
        # 7th, 15th and 23rd convolution layer has biases and no batch norm.
        
        
        ranges = [range(0, 6), range(6, 13), range(13, 20)]
        unnormalized = [6, 13, 20]
        print(variables)
        for j in range(3):
            for i in ranges[j]:
                print('i: %s, j:%s' %(i,j))
                current = 52 * 5 + 5 * i + j * 2
                
                conv_var = variables[current]
                gamma, beta, mean, variance =  \
                    variables[current + 1:current + 5]
                batch_norm_vars = [beta, gamma, mean, variance]
                
                for var in batch_norm_vars:
                    shape = var.shape.as_list()
                    num_params = np.prod(shape)
                    var_weights = weights[ptr:ptr + num_params].reshape(shape)
                    ptr += num_params
                    assign_ops.append(tf.assign(var, var_weights))
                
                shape = conv_var.shape.as_list()
                num_params = np.prod(shape)
                print('%s' % shape)
                var_weights = weights[ptr:ptr + num_params].reshape(
                    (shape[3], shape[2], shape[0], shape[1]))
                print('reaching here')
                var_weights = np.transpose(var_weights, (2, 3, 1, 0))
                ptr += num_params
                assign_ops.append(tf.assign(conv_var, var_weights))
                
            bias = variables[52 * 5 + unnormalized[j] * 5 + j * 2 + 1]
            shape = bias.shape.as_list()
            num_params = np.prod(shape)
            var_weights = weights[ptr:ptr + num_params].reshape(shape)
            ptr += num_params
            assign_ops.append(tf.assign(bias, var_weights))

            conv_var = variables[52 * 5 + unnormalized[j] * 5 + j * 2]
            shape = conv_var.shape.as_list()
            num_params = np.prod(shape)
            var_weights = weights[ptr:ptr + num_params].reshape(
                (shape[3], shape[2], shape[0], shape[1]))
            var_weights = np.transpose(var_weights, (2, 3, 1, 0))
            ptr += num_params
            assign_ops.append(tf.assign(conv_var, var_weights))

    return assign_ops

In [49]:
import os
path = '/home/iamtheuserofthis/untagged_data/yolo_test'
image_paths= []

for file in os.listdir(path):
    image_paths.append(os.path.join(path, file))

In [50]:
tf.reset_default_graph()
batch_size = len(image_paths)
batch = load_images(image_paths, model_size=_MODEL_SIZE)
class_names = load_class_names(os.path.join('/home/iamtheuserofthis/untagged_data/','coco.names'))
n_classes = len(class_names)
max_output_size = 10
iou_threshold = 0.5
confidence_threshold = 0.5

model = Yolo_v3(n_classes=n_classes, model_size=_MODEL_SIZE,
               max_output_size=max_output_size,
               iou_threshold=iou_threshold,
               confidence_threshold=confidence_threshold)

inputs = tf.placeholder(tf.float32, [batch_size, 416, 416, 3])
detections = model(inputs, training=False)

yolo_weights = '/home/iamtheuserofthis/python_workspace/data_for_ml/yolov3.weights'
model_vars = tf.global_variables(scope='yolo_v3_model')
assign_ops = load_weights(model_vars, yolo_weights)

completed out 1
yolo first layer working
reshaping done
splitting done
completed out detect1
yolo first layer working
reshaping done
splitting done
yolo first layer working
reshaping done
splitting done
i: 0, j:0
[1, 1, 1024, 512]
reaching here
i: 1, j:0
[3, 3, 512, 1024]
reaching here
i: 2, j:0
[1, 1, 1024, 512]
reaching here
i: 3, j:0
[3, 3, 512, 1024]
reaching here
i: 4, j:0
[1, 1, 1024, 512]
reaching here
i: 5, j:0
[3, 3, 512, 1024]
reaching here
i: 6, j:1
[1, 1, 512, 256]
reaching here
i: 7, j:1
[256]


IndexError: list index out of range

In [None]:
model