**Implement VGG using TF**

In [1]:
# Load data
import os
import tensorflow as tf
import numpy as np

# load weights from https://dl.dropboxusercontent.com/u/50333326/vgg16.npy
path = '/home/ariel/DL/tensorflow/tutorials/'
vgg16_npy_path = os.path.join(path, 'vgg16.npy')            
data_dict = np.load(vgg16_npy_path, encoding='latin1').item()

# print keys in dictionary - 
# here it prints all the layers names in the VGG net
name = 'conv1_1'
print 'keys in layers = ', data_dict.keys(), '\n'
# tensorflow: weights are [height, width, in_channels, out_channels]
print 'weights shape in conv1_1', data_dict[name][0].shape, '\n'
print 'bias shape in conv1_1', data_dict[name][1].shape, '\n'


keys in layers =  ['conv5_1', 'fc6', 'conv5_3', 'fc7', 'fc8', 'conv5_2', 'conv4_1', 'conv4_2', 'conv4_3', 'conv3_3', 'conv3_2', 'conv3_1', 'conv1_1', 'conv1_2', 'conv2_2', 'conv2_1'] 

weights shape in conv1_1 (3, 3, 3, 64) 

bias shape in conv1_1 (64,) 



**The Conv Block in TF**
in the _fc2conv() block, each fully connected layer is converted into a convolutional one.
For 'fc8', the last fc block you can fine tune a number of classes which is lower than the 
initial number of classes (=1000). 
For the moment, the new number of classes == original number of classes == 1000.
Two methods may be used in order to  convert the original num of classes to a new num of clases:
1. averaging - see https://github.com/MarvinTeichmann/tensorflow-fcn/blob/master/fcn8_vgg.py
2. fine tuning - see https://github.com/fastai/courses/blob/master/deeplearning1/nbs/vgg16.py

both methods must be experimented, the best one must be slected!!!....

In [13]:
def _max_pool(bottom, name):
    pool = tf.nn.max_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1],
                          padding='SAME', name=name)
    return pool

def get_weights(name):
    init = tf.constant_initializer(value=data_dict[name][0], dtype=tf.float32)
    weights = tf.get_variable(name="weights", initializer=init, shape=data_dict[name][0].shape)
    return weights

def get_biases(name):
    init = tf.constant_initializer(value=data_dict[name][1], dtype=tf.float32)
    biases = tf.get_variable(name="biases", initializer=init, shape=data_dict[name][1].shape)
    return biases

# convert FC layer into Convolution layer
def _fc2conv(bottom, name, num_classes = None):
    with tf.variable_scope(name):# as scope:
        weights = data_dict[name][0]
        if name == 'fc6':        
            shape = [7, 7, 512, 4096] # tf weights: [kernel_rows, kernel_cols, input, output]
            weights = weights.reshape(shape)
            print 'fc6', weights.shape
        elif name == 'fc7':        
            shape = [1, 1, 4096, 4096]
            weights = weights.reshape(shape)
            print 'fc7', weights.shape
        else: # name == 'fc8'
            shape = [1, 1, 4096, 1000]
            weights = weights.reshape(shape) # all 1000 classes
            print 'fc8', weights.shape

        # set weights
        init = tf.constant_initializer(value=weights, dtype=tf.float32)
        filt = tf.get_variable(name="weights", initializer=init, shape=shape)

        # conv
        conv = tf.nn.conv2d(bottom, filt, [1, 1, 1, 1], padding='SAME')

        # bias
        conv_biases = get_biases(name)
        bias = tf.nn.bias_add(conv, conv_biases)

        # relu
        bias = tf.nn.relu(bias)

        # return
        return bias

def _conv_layer(bottom, name):
    with tf.variable_scope(name):# as scope:
        # get filter weights
        weights = get_weights(name)
       
        # get filter biases
        biases = get_biases(name)

        # convolve: weights * bottom
        conv = tf.nn.conv2d(bottom, weights, [1, 1, 1, 1], padding='SAME')

        # add biases: weights * bottom + biases
        conv_biases = tf.nn.bias_add(conv, biases)

        # relu: relu(weights * bottom + biases)
        conv_biases_relu = tf.nn.relu(conv_biases)

        return conv_biases_relu

** the network **

In [15]:
# this command clean all nodes in the graph + variables
tf.reset_default_graph()

# load an image
IMAGE_SIZE = 224
img_shape = (1, IMAGE_SIZE, IMAGE_SIZE, 3) # tf format (N,H,W,C)
img = np.linspace(-0.1, 0.5, num=np.prod(img_shape)).reshape(img_shape)

# remove the mean of imagenet
vgg_mean = np.array([103.939, 116.779, 123.68], dtype=np.float32).reshape((1,1,3)) # BGR
img = img - vgg_mean

# tf handle to img
tf_img = tf.Variable(img, name="img", dtype=tf.float32)


_conv1_1 = _conv_layer(tf_img, 'conv1_1')
_conv1_2 = _conv_layer(_conv1_1, 'conv1_2')
_pool1   = _max_pool(_conv1_2, 'pool1')

_conv2_1 = _conv_layer(_pool1, 'conv2_1')
_conv2_2 = _conv_layer(_conv2_1, 'conv2_2')
_pool2   = _max_pool(_conv2_2, 'pool2')

_conv3_1 = _conv_layer(_pool2, 'conv3_1')
_conv3_2 = _conv_layer(_conv3_1, 'conv3_2')
_conv3_3 = _conv_layer(_conv3_2, 'conv3_3')
_pool3   = _max_pool(_conv3_3, 'pool3')

_conv4_1 = _conv_layer(_pool3, 'conv4_1')
_conv4_2 = _conv_layer(_conv4_1, 'conv4_2')
_conv4_3 = _conv_layer(_conv4_2, 'conv4_3')
_pool4   = _max_pool(_conv4_3, 'pool4')

_conv5_1 = _conv_layer(_pool4, 'conv5_1')
_conv5_2 = _conv_layer(_conv5_1, 'conv5_2')
_conv5_3 = _conv_layer(_conv5_2, 'conv5_3')
_pool5   = _max_pool(_conv5_3, 'pool5')

_fc6 = _fc2conv(_pool5, 'fc6')
_fc7 = _fc2conv(_fc6, 'fc7')
_fc8 = _fc2conv(_fc7, 'fc8')

# weights = data_dict['fc6'][0]
# weights = weights.reshape([7, 7, 512, 4096])




sess = tf.Session()
sess.run(tf.initialize_all_variables())
res = sess.run(_fc8) 
print res.shape
print res

fc6 (7, 7, 512, 4096)
fc7 (1, 1, 4096, 4096)
fc8 (1, 1, 4096, 1000)
(1, 7, 7, 1000)
[[[[ 0.          0.09025753  0.         ...,  0.          0.          0.79901946]
   [ 0.          0.07668401  0.         ...,  0.          0.          0.86026871]
   [ 0.          0.07620315  0.         ...,  0.          0.          0.80350524]
   ..., 
   [ 0.          0.1233471   0.         ...,  0.          0.          0.86561912]
   [ 0.          0.14276227  0.         ...,  0.          0.          0.98144484]
   [ 0.          0.01396307  0.         ...,  0.          0.          1.06213295]]

  [[ 0.          0.10429658  0.         ...,  0.          0.          0.76482892]
   [ 0.          0.15433228  0.         ...,  0.          0.          0.8034817 ]
   [ 0.          0.20993349  0.         ...,  0.          0.          0.61662412]
   ..., 
   [ 0.          0.2102727   0.         ...,  0.          0.          0.81622267]
   [ 0.          0.27374965  0.         ...,  0.          0.          0.8795