**Implement VGG using TF**

In [9]:
# Load data
import os
import tensorflow as tf
import numpy as np
from math import ceil

# load weights from https://dl.dropboxusercontent.com/u/50333326/vgg16.npy
path = '/home/ariel/DL/tensorflow/tutorials/'
vgg16_npy_path = os.path.join(path, 'vgg16.npy')            
data_dict = np.load(vgg16_npy_path, encoding='latin1').item()

# print keys in dictionary - 
# here it prints all the layers names in the VGG net
name = 'conv1_1'
print 'keys in layers = ', data_dict.keys(), '\n'
# tensorflow: weights are [height, width, in_channels, out_channels]
print 'weights shape in conv1_1', data_dict[name][0].shape, '\n'
print 'bias shape in conv1_1', data_dict[name][1].shape, '\n'


keys in layers =  ['conv5_1', 'fc6', 'conv5_3', 'fc7', 'fc8', 'conv5_2', 'conv4_1', 'conv4_2', 'conv4_3', 'conv3_3', 'conv3_2', 'conv3_1', 'conv1_1', 'conv1_2', 'conv2_2', 'conv2_1'] 

weights shape in conv1_1 (3, 3, 3, 64) 

bias shape in conv1_1 (64,) 



**The Conv Block in TF**
in the _fc2conv() block, each fully connected layer is converted into a convolutional one.
For 'fc8', the last fc block you can fine tune a number of classes which is lower than the 
initial number of classes (=1000). 
For the moment, the new number of classes == original number of classes == 1000.
Two methods may be used in order to  convert the original num of classes to a new num of classes:
1. averaging - see https://github.com/MarvinTeichmann/tensorflow-fcn/blob/master/fcn8_vgg.py
2. fine tuning - see https://github.com/fastai/courses/blob/master/deeplearning1/nbs/vgg16.py
3. 

both methods must be experimented, the best one must be selected!!!....

Another note: I'm not using weight decay - need to experiment with it a bit !!!

In [22]:
def _max_pool(bottom, name):
    pool = tf.nn.max_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1],
                          padding='SAME', name=name)
    return pool

def get_weights(name):
    init = tf.constant_initializer(value=data_dict[name][0], dtype=tf.float32)
    weights = tf.get_variable(name="weights", initializer=init, shape=data_dict[name][0].shape)
    return weights

def get_biases(name):
    init = tf.constant_initializer(value=data_dict[name][1], dtype=tf.float32)
    biases = tf.get_variable(name="biases", initializer=init, shape=data_dict[name][1].shape)
    return biases

# convert FC layer into Convolution layer
def _fc2conv(bottom, name, num_classes = None):
    with tf.variable_scope(name):# as scope:
        weights = data_dict[name][0]
        if name == 'fc6':        
            shape = [7, 7, 512, 4096] # tf weights: [kernel_rows, kernel_cols, input, output]
            weights = weights.reshape(shape)
#             print 'fc6', weights.shape
        elif name == 'fc7':        
            shape = [1, 1, 4096, 4096]
            weights = weights.reshape(shape)
#             print 'fc7', weights.shape
        else: # name == 'fc8'
            shape = [1, 1, 4096, 1000]
            weights = weights.reshape(shape) # all 1000 classes
#             print 'fc8', weights.shape

        # set weights
        init = tf.constant_initializer(value=weights, dtype=tf.float32)
        filt = tf.get_variable(name="weights", initializer=init, shape=shape)

        # conv
        conv = tf.nn.conv2d(bottom, filt, [1, 1, 1, 1], padding='SAME')

        # bias
        conv_biases = get_biases(name)
        bias = tf.nn.bias_add(conv, conv_biases)

        # relu
        bias = tf.nn.relu(bias)

        # return
        return bias

def _conv_layer(bottom, name):
    with tf.variable_scope(name):# as scope:
        # get filter weights
        weights = get_weights(name)
       
        # get filter biases
        biases = get_biases(name)

        # convolve: weights * bottom
        conv = tf.nn.conv2d(bottom, weights, [1, 1, 1, 1], padding='SAME')

        # add biases: weights * bottom + biases
        conv_biases = tf.nn.bias_add(conv, biases)

        # relu: relu(weights * bottom + biases)
        conv_biases_relu = tf.nn.relu(conv_biases)

        return conv_biases_relu
    
def get_deconv_filter(f_shape):
        width = f_shape[0]
        heigh = f_shape[0]
        f = ceil(width/2.0)
        c = (2 * f - 1 - f % 2) / (2.0 * f)
        bilinear = np.zeros([f_shape[0], f_shape[1]])
        for x in range(width):
            for y in range(heigh):
                value = (1 - abs(x / f - c)) * (1 - abs(y / f - c))
                bilinear[x, y] = value
        weights = np.zeros(f_shape)
        for i in range(f_shape[2]):
            weights[:, :, i, i] = bilinear

        init = tf.constant_initializer(value=weights, dtype=tf.float32)
        var = tf.get_variable(name="up_filter", initializer=init, shape=weights.shape)
        return var

# currently - no l2 regularization   
def _variable_with_weight_decay(shape, stddev, wd):
        """Helper to create an initialized Variable with weight decay.
        Note that the Variable is initialized with a truncated normal
        distribution.
        A weight decay is added only if one is specified.
        Args:
          name: name of the variable
          shape: list of ints
          stddev: standard deviation of a truncated Gaussian
          wd: add L2Loss weight decay multiplied by this float. If None, weight
              decay is not added for this Variable.
        Returns:
          Variable Tensor
        """

        initializer = tf.truncated_normal_initializer(stddev=stddev)
        var = tf.get_variable('weights', shape=shape, initializer=initializer)

#         collection_name = tf.GraphKeys.REGULARIZATION_LOSSES
#         if wd and (not tf.get_variable_scope().reuse):
#             weight_decay = tf.multiply(
#                 tf.nn.l2_loss(var), wd, name='weight_loss')
#             tf.add_to_collection(collection_name, weight_decay)
#         _variable_summaries(var)
        return var    


def _bias_variable(shape, constant=0.0):
        initializer = tf.constant_initializer(constant)
        var = tf.get_variable(name='biases', shape=shape, initializer=initializer)
        
        return var

def _score_layer(bottom, name, num_classes):
        with tf.variable_scope(name):
            # get number of input channels
            in_features = bottom.get_shape()[3].value
            shape = [1, 1, in_features, num_classes]
            # He initialization Sheme
            if name == "score_fr":
                num_input = in_features
                stddev = (2 / num_input)**0.5
            elif name == "score_pool4":
                stddev = 0.001
            elif name == "score_pool3":
                stddev = 0.0001
            # Apply convolution
            w_decay = 5e-4#self.wd

            weights = _variable_with_weight_decay(shape, stddev, w_decay)
            conv = tf.nn.conv2d(bottom, weights, [1, 1, 1, 1], padding='SAME')
            # Apply bias
            conv_biases = _bias_variable([num_classes], constant=0.0)
            bias = tf.nn.bias_add(conv, conv_biases)

#             _activation_summary(bias)

            return bias    
        
def _upscore_layer(bottom, shape, num_classes, name, ksize=4, stride=2):
        strides = [1, stride, stride, 1]
        with tf.variable_scope(name):
            in_features = bottom.get_shape()[3].value

            if shape is None:
                # Compute shape out of Bottom
                in_shape = tf.shape(bottom)

                h = ((in_shape[1] - 1) * stride) + 1
                w = ((in_shape[2] - 1) * stride) + 1
                new_shape = [in_shape[0], h, w, num_classes]
            else:
                new_shape = [shape[0], shape[1], shape[2], num_classes]
#             output_shape = tf.stack(new_shape)

            f_shape = [ksize, ksize, num_classes, in_features]
          
            weights = get_deconv_filter(f_shape)
            deconv = tf.nn.conv2d_transpose(bottom, weights, new_shape,
                                            strides=strides, padding='SAME')

        return deconv    

** the network **

In [27]:
# this command clean all nodes in the graph + variables
tf.reset_default_graph()

# load an image
IMAGE_SIZE = 224
img_shape = (1, IMAGE_SIZE, IMAGE_SIZE, 3) # tf format (N,H,W,C)
img = np.linspace(-0.1, 0.5, num=np.prod(img_shape)).reshape(img_shape)

# remove the mean of imagenet
vgg_mean = np.array([103.939, 116.779, 123.68], dtype=np.float32).reshape((1,1,3)) # BGR
img = img - vgg_mean

# tf handle to img
tf_img = tf.Variable(img, name="img", dtype=tf.float32)

# VGG8 network

_conv1_1 = _conv_layer(tf_img, 'conv1_1')
_conv1_2 = _conv_layer(_conv1_1, 'conv1_2')
_pool1   = _max_pool(_conv1_2, 'pool1')

_conv2_1 = _conv_layer(_pool1, 'conv2_1')
_conv2_2 = _conv_layer(_conv2_1, 'conv2_2')
_pool2   = _max_pool(_conv2_2, 'pool2')

_conv3_1 = _conv_layer(_pool2, 'conv3_1')
_conv3_2 = _conv_layer(_conv3_1, 'conv3_2')
_conv3_3 = _conv_layer(_conv3_2, 'conv3_3')
_pool3   = _max_pool(_conv3_3, 'pool3')

_conv4_1 = _conv_layer(_pool3, 'conv4_1')
_conv4_2 = _conv_layer(_conv4_1, 'conv4_2')
_conv4_3 = _conv_layer(_conv4_2, 'conv4_3')
_pool4   = _max_pool(_conv4_3, 'pool4')

_conv5_1 = _conv_layer(_pool4, 'conv5_1')
_conv5_2 = _conv_layer(_conv5_1, 'conv5_2')
_conv5_3 = _conv_layer(_conv5_2, 'conv5_3')
_pool5   = _max_pool(_conv5_3, 'pool5')

_fc6 = _fc2conv(_pool5, 'fc6')
_fc7 = _fc2conv(_fc6, 'fc7')
_fc8 = _fc2conv(_fc7, 'fc8')

_upscore2 = _upscore_layer(_fc8, 
                           tf.shape(_pool4), 
                           num_classes = 20, 
                           name = 'upscore2')

_score_pool4 = _score_layer(_pool4, 
                            name = 'score_pool4', 
                            num_classes=20)

_fuse_pool4 = tf.add(_upscore2, _score_pool4)


sess = tf.Session()
sess.run(tf.initialize_all_variables())
res = sess.run(_fuse_pool4) 
print res.shape
print res

(1, 14, 14, 20)
[[[[ 2.04171395 -2.56970525 -1.77781034 ...,  0.89347905 -2.68317986
    -1.61154878]
   [-0.24284291 -1.85051465 -1.00107169 ..., -0.31923321 -2.58583021
    -0.92440915]
   [-0.09194654 -0.99017757 -0.66342604 ...,  0.1794512  -1.93524683
    -0.75973552]
   ..., 
   [-0.0527007  -0.90517104 -0.69214511 ...,  0.17268394 -1.88305366
    -0.75407839]
   [ 0.1254195  -0.67680436 -0.80938649 ...,  0.38627341 -2.18109536
    -1.06300688]
   [ 1.67231965 -1.30818033 -2.20197105 ...,  0.55524033 -0.92248863
    -1.77833533]]

  [[-0.01622808 -0.74701178 -0.81194186 ...,  0.86736315 -1.27415359
    -1.35166657]
   [-0.6100679   0.19764349 -0.30677283 ...,  0.47503155 -0.73820388
    -0.74768996]
   [-0.26006109  0.38557193 -0.20660688 ...,  0.31468454 -0.45800251
    -0.56117243]
   ..., 
   [-0.39624178  0.451644   -0.17920198 ...,  0.44634181 -0.39602423
    -0.50785339]
   [-0.58567691  0.52538747 -0.30106038 ...,  0.91013157 -0.27931437
    -0.85501492]
   [ 0.19287968  0