Implement VGG using Keras

In [1]:
# Load data
import os
import tensorflow as tf
import numpy as np

# load weights from https://dl.dropboxusercontent.com/u/50333326/vgg16.npy
path = '/home/ariel/DL/tensorflow/tutorials/'
vgg16_npy_path = os.path.join(path, 'vgg16.npy')            
data_dict = np.load(vgg16_npy_path, encoding='latin1').item()

# print keys in dictionary - 
# here it prints all the layers names in the VGG net
name = 'conv1_1'
print 'keys in layers = ', data_dict.keys(), '\n'
# tensorflow: weights are [height, width, in_channels, out_channels]
print 'weights shape in conv1_1', data_dict[name][0].shape, '\n'
print 'bias shape in conv1_1', data_dict[name][1].shape, '\n'


keys in layers =  ['conv5_1', 'fc6', 'conv5_3', 'fc7', 'fc8', 'conv5_2', 'conv4_1', 'conv4_2', 'conv4_3', 'conv3_3', 'conv3_2', 'conv3_1', 'conv1_1', 'conv1_2', 'conv2_2', 'conv2_1'] 

weights shape in conv1_1 (3, 3, 3, 64) 

bias shape in conv1_1 (64,) 



** the network **

In [2]:
def extract_data(name):
    nb_filters_out = data_dict[name][0].shape[3]
    nb_rows = data_dict[name][0].shape[0]
    nb_cols = data_dict[name][0].shape[1]
    nb_channels = data_dict[name][0].shape[2]
    weight = data_dict[name][0]
    bias = data_dict[name][1]
    return nb_filters_out, nb_rows, nb_cols, nb_channels, weight, bias

In [6]:
# build a simple model
from keras.models import Sequential
from keras.layers.convolutional import Convolution2D, MaxPooling2D


# load an image
IMAGE_SIZE = 224
img_shape = (1, IMAGE_SIZE, IMAGE_SIZE, 3) # tf format (N,H,W,C)
img = np.linspace(-0.1, 0.5, num=np.prod(img_shape)).reshape(img_shape)

# the network
model = Sequential()

nb_filters_out, nb_rows, nb_cols, nb_channels, weight, bias = extract_data('conv1_1')    
model.add(Convolution2D(nb_filters_out, # number of output filters
                        nb_rows,        # number of rows in the input kernel   
                        nb_cols,        # number of cols in the input kernel   
                        border_mode='same', 
                        input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3), # shape of input image = ROWS x COLS x CHANNELS 
                        activation='relu', # activation
                        weights=[weight, bias])) # initial weights

nb_filters_out, nb_rows, nb_cols, nb_channels, weight, bias = extract_data('conv1_2')    
model.add(Convolution2D(nb_filters_out, # number of output filters
                        nb_rows,        # number of rows in the input kernel   
                        nb_cols,        # number of cols in the input kernel   
                        border_mode='same', 
                        input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3), # shape of input image = ROWS x COLS x CHANNELS 
                        activation='relu', # activation
                        weights=[weight, bias])) # initial weights

model.add(MaxPooling2D(pool_size=(2, 2), strides=None, border_mode='same'))

nb_filters_out, nb_rows, nb_cols, nb_channels, weight, bias = extract_data('conv2_1')    
model.add(Convolution2D(nb_filters_out, # number of output filters
                        nb_rows,        # number of rows in the input kernel   
                        nb_cols,        # number of cols in the input kernel   
                        border_mode='same', 
                        input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3), # shape of input image = ROWS x COLS x CHANNELS 
                        activation='relu', # activation
                        weights=[weight, bias])) # initial weights

nb_filters_out, nb_rows, nb_cols, nb_channels, weight, bias = extract_data('conv2_2')    
model.add(Convolution2D(nb_filters_out, # number of output filters
                        nb_rows,        # number of rows in the input kernel   
                        nb_cols,        # number of cols in the input kernel   
                        border_mode='same', 
                        input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3), # shape of input image = ROWS x COLS x CHANNELS 
                        activation='relu', # activation
                        weights=[weight, bias])) # initial weights

model.add(MaxPooling2D(pool_size=(2, 2), strides=None, border_mode='same'))

nb_filters_out, nb_rows, nb_cols, nb_channels, weight, bias = extract_data('conv3_1')    
model.add(Convolution2D(nb_filters_out, # number of output filters
                        nb_rows,        # number of rows in the input kernel   
                        nb_cols,        # number of cols in the input kernel   
                        border_mode='same', 
                        input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3), # shape of input image = ROWS x COLS x CHANNELS 
                        activation='relu', # activation
                        weights=[weight, bias])) # initial weights

nb_filters_out, nb_rows, nb_cols, nb_channels, weight, bias = extract_data('conv3_2')    
model.add(Convolution2D(nb_filters_out, # number of output filters
                        nb_rows,        # number of rows in the input kernel   
                        nb_cols,        # number of cols in the input kernel   
                        border_mode='same', 
                        input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3), # shape of input image = ROWS x COLS x CHANNELS 
                        activation='relu', # activation
                        weights=[weight, bias])) # initial weights

nb_filters_out, nb_rows, nb_cols, nb_channels, weight, bias = extract_data('conv3_3')    
model.add(Convolution2D(nb_filters_out, # number of output filters
                        nb_rows,        # number of rows in the input kernel   
                        nb_cols,        # number of cols in the input kernel   
                        border_mode='same', 
                        input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3), # shape of input image = ROWS x COLS x CHANNELS 
                        activation='relu', # activation
                        weights=[weight, bias])) # initial weights

model.add(MaxPooling2D(pool_size=(2, 2), strides=None, border_mode='same'))


nb_filters_out, nb_rows, nb_cols, nb_channels, weight, bias = extract_data('conv4_1')    
model.add(Convolution2D(nb_filters_out, # number of output filters
                        nb_rows,        # number of rows in the input kernel   
                        nb_cols,        # number of cols in the input kernel   
                        border_mode='same', 
                        input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3), # shape of input image = ROWS x COLS x CHANNELS 
                        activation='relu', # activation
                        weights=[weight, bias])) # initial weights

nb_filters_out, nb_rows, nb_cols, nb_channels, weight, bias = extract_data('conv4_2')    
model.add(Convolution2D(nb_filters_out, # number of output filters
                        nb_rows,        # number of rows in the input kernel   
                        nb_cols,        # number of cols in the input kernel   
                        border_mode='same', 
                        input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3), # shape of input image = ROWS x COLS x CHANNELS 
                        activation='relu', # activation
                        weights=[weight, bias])) # initial weights

nb_filters_out, nb_rows, nb_cols, nb_channels, weight, bias = extract_data('conv4_3')    
model.add(Convolution2D(nb_filters_out, # number of output filters
                        nb_rows,        # number of rows in the input kernel   
                        nb_cols,        # number of cols in the input kernel   
                        border_mode='same', 
                        input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3), # shape of input image = ROWS x COLS x CHANNELS 
                        activation='relu', # activation
                        weights=[weight, bias])) # initial weights

model.add(MaxPooling2D(pool_size=(2, 2), strides=None, border_mode='same'))

nb_filters_out, nb_rows, nb_cols, nb_channels, weight, bias = extract_data('conv5_1')    
model.add(Convolution2D(nb_filters_out, # number of output filters
                        nb_rows,        # number of rows in the input kernel   
                        nb_cols,        # number of cols in the input kernel   
                        border_mode='same', 
                        input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3), # shape of input image = ROWS x COLS x CHANNELS 
                        activation='relu', # activation
                        weights=[weight, bias])) # initial weights

nb_filters_out, nb_rows, nb_cols, nb_channels, weight, bias = extract_data('conv5_2')    
model.add(Convolution2D(nb_filters_out, # number of output filters
                        nb_rows,        # number of rows in the input kernel   
                        nb_cols,        # number of cols in the input kernel   
                        border_mode='same', 
                        input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3), # shape of input image = ROWS x COLS x CHANNELS 
                        activation='relu', # activation
                        weights=[weight, bias])) # initial weights

nb_filters_out, nb_rows, nb_cols, nb_channels, weight, bias = extract_data('conv5_3')    
model.add(Convolution2D(nb_filters_out, # number of output filters
                        nb_rows,        # number of rows in the input kernel   
                        nb_cols,        # number of cols in the input kernel   
                        border_mode='same', 
                        input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3), # shape of input image = ROWS x COLS x CHANNELS 
                        activation='relu', # activation
                        weights=[weight, bias])) # initial weights

model.add(MaxPooling2D(pool_size=(2, 2), strides=None, border_mode='same'))



# run on a simple image
res = model.predict(img, batch_size=1)
print res.shape
print res
model.summary()

(1, 7, 7, 512)
[[[[ 0.04152922  0.          0.         ...,  0.          0.76243585  0.        ]
   [ 0.          0.          0.         ...,  0.          0.73776311  0.        ]
   [ 0.          0.          0.         ...,  0.          0.73881972  0.        ]
   ..., 
   [ 0.          0.          0.         ...,  0.          0.72844511  0.        ]
   [ 0.          0.          0.         ...,  0.          0.79345977  0.        ]
   [ 0.          0.          0.         ...,  0.          0.78591818  0.        ]]

  [[ 0.          0.          0.         ...,  0.          0.67036343  0.        ]
   [ 0.          0.          0.         ...,  0.          0.70792282  0.        ]
   [ 0.          0.          0.         ...,  0.          0.72337979  0.        ]
   ..., 
   [ 0.          0.          0.         ...,  0.          0.72094685  0.        ]
   [ 0.06755218  0.          0.         ...,  0.          0.66223222  0.        ]
   [ 0.07043768  0.          0.         ...,  0.          0.691

Comment: perfect match at this point between vgg_keras (this file) with vgg_tf.

TODO: 
1. convert this file into a class as it's done in the following:
    https://github.com/fastai/courses/blob/master/deeplearning1/nbs/vgg16.py
2. build a class that enable to chop down from 1000 categories to a few  categories as it's done in the link above.
3. Train the model on the LSUN db

