In [4]:
import lasagne
import time
import numpy as np
import pickle
import os
import os.path
import theano
import theano.tensor as T

from lasagne.utils import floatX

import matplotlib.pyplot as plt
%matplotlib inline
%load_ext autoreload
%reload_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [5]:
# VGG-19, 19-layer model from the paper:
# "Very Deep Convolutional Networks for Large-Scale Image Recognition"
# Original source: https://gist.github.com/ksimonyan/3785162f95cd2d5fee77
# License: non-commercial use only

from lasagne.layers import InputLayer, DenseLayer, NonlinearityLayer
#from lasagne.layers.dnn import Conv2DDNNLayer as ConvLayer
from lasagne.layers import Conv2DLayer as ConvLayer
from lasagne.layers import Pool2DLayer as PoolLayer # ORIG
#from lasagne.layers import MaxPool2DLayer as PoolLayer
from lasagne.nonlinearities import softmax

IMAGE_W = 512
if "NS_IMAGE_W" in os.environ: IMAGE_W = int(os.environ["NS_IMAGE_W"])

# Note: tweaked to use average pooling instead of maxpooling
def build_model():
    net = {}
    net['input'] = InputLayer((1, 3, IMAGE_W, IMAGE_W))
    net['conv1_1'] = ConvLayer(net['input'], 64, 3, pad=1)
    net['conv1_2'] = ConvLayer(net['conv1_1'], 64, 3, pad=1)
    net['pool1'] = PoolLayer(net['conv1_2'], 2, mode='average_exc_pad')
    net['conv2_1'] = ConvLayer(net['pool1'], 128, 3, pad=1)
    net['conv2_2'] = ConvLayer(net['conv2_1'], 128, 3, pad=1)
    net['pool2'] = PoolLayer(net['conv2_2'], 2, mode='average_exc_pad')
    net['conv3_1'] = ConvLayer(net['pool2'], 256, 3, pad=1)
    net['conv3_2'] = ConvLayer(net['conv3_1'], 256, 3, pad=1)
    net['conv3_3'] = ConvLayer(net['conv3_2'], 256, 3, pad=1)
    net['conv3_4'] = ConvLayer(net['conv3_3'], 256, 3, pad=1)
    net['pool3'] = PoolLayer(net['conv3_4'], 2, mode='average_exc_pad')
    net['conv4_1'] = ConvLayer(net['pool3'], 512, 3, pad=1)
    net['conv4_2'] = ConvLayer(net['conv4_1'], 512, 3, pad=1)
    net['conv4_3'] = ConvLayer(net['conv4_2'], 512, 3, pad=1)
    net['conv4_4'] = ConvLayer(net['conv4_3'], 512, 3, pad=1)
    net['pool4'] = PoolLayer(net['conv4_4'], 2, mode='average_exc_pad')
    net['conv5_1'] = ConvLayer(net['pool4'], 512, 3, pad=1)
    net['conv5_2'] = ConvLayer(net['conv5_1'], 512, 3, pad=1)
    net['conv5_3'] = ConvLayer(net['conv5_2'], 512, 3, pad=1)
    net['conv5_4'] = ConvLayer(net['conv5_3'], 512, 3, pad=1)
    net['pool5'] = PoolLayer(net['conv5_4'], 2, mode='average_exc_pad')

    return net

In [39]:
def im_to_col(im, psize, n_channels=3, pad=None):
    """Similar to MATLAB's im2col function.

    Args:
    im - a Theano tensor3, of the form <n_channels, height, width>.
    psize - an int specifying the (square) block size to use
    n_channels - the number of channels in im

    Returns: a 5-tensor of the form <patch_id_i, patch_id_j, n_channels, psize,
           psize>.
    """
    assert im.ndim == 3, "im must have dimension 3."
    
    if pad is not None:
        C = 0
        im = np.pad(im, [(0,0),(pad,pad),(pad,pad)], 'constant', constant_values=C)
        assert im.shape[0] == 3
    
    DEFAULT = False
    if DEFAULT:
        im = im[:, ::-1, ::-1]
    
    res = T.zeros((n_channels, psize * psize, im.shape[1] - psize + 1,
                  im.shape[2] - psize + 1))
    filts = T.reshape(T.eye(psize * psize, psize * psize),
                      (psize * psize, psize, psize))
    filts = T.shape_padleft(filts).dimshuffle((1, 0, 2, 3))

    for i in xrange(n_channels):
        cur_slice = T.shape_padleft(im[i], n_ones=2)
        res = T.set_subtensor(res[i], T.nnet.conv.conv2d(cur_slice, filts)[0])

    return res.dimshuffle((0, 2, 3, 1)).reshape(
      (n_channels, im.shape[1] - psize + 1, im.shape[2] - psize + 1,
       psize, psize)).dimshuffle((1, 2, 0, 3, 4))

#
# Set up net, initialize weights.
# 16x16 is the smallest acceptable image size
# There are 64 filters/kern
#
NUM_K = 64
IMAGE_W = 16
net = build_model()
values = pickle.load(open('vgg19_normalized.pkl'))['param values']    
lasagne.layers.set_all_param_values(net['pool5'], values)

#
# https://github.com/Lasagne/Recipes/issues/9
# Flip filter when using Conv2D instead of Conv2DDNNLayer
# Depending on how the net was trained you flip, or not
# Not flipping is not a true convolution but is a bit faster, hence it's used for DNN
#
FLIP_FILTER = True
if FLIP_FILTER:
    for k in net:
        layer = net[k]
        if k.startswith('conv'):
            layer.W.set_value(np.asarray(layer.W.get_value())[:,:,::-1,::-1])


#
# Get net weights and set net bias to 0
#
W, b = lasagne.layers.get_all_param_values(net['conv1_1'])
print "\nnet info\n","--------"
print "W.shape", W.shape, "b.shape", b.shape

#
# Fake photo, ones or arange
#
test_photo = np.ones( (1,3,IMAGE_W,IMAGE_W), np.float32 )
test_photo[0,:,0,0] = 2.0
test_photo[0,:,1,1] = 3.0
test_photo = np.arange(1*3*IMAGE_W*IMAGE_W).reshape((1,3,IMAGE_W,IMAGE_W)).astype(np.float32) / 10000.

#
# 1. Calculate the real answer with lasagne
#
# deterministic=True is unnecessary for this layer (no dropout before)
conv_lasagne = lasagne.layers.get_output(net['conv1_1'], test_photo, deterministic=True).eval()

#
# Lagagne output
#
print "\nconv_lasagne\n","---------"
print "conv_lasagne.shape", conv_lasagne.shape
print "conv_lasagne[0,2,1]", conv_lasagne[0,2,1]


#
# 2. Test theano conv2d
# 
# http://lasagne.readthedocs.org/en/latest/modules/layers/conv.html
# Theano’s underlying convolution (theano.tensor.nnet.conv.conv2d()) only supports pad=0 and pad='full'. 
# This layer emulates other modes by cropping a full convolution or explicitly padding the input with zeros.
# Alternative to add_pad is to use border_mode="full" and
# crop_x = 3 // 2; crop_y = 3 // 2; conv_T = conv_T[:, :, crop_x:-crop_x or None, crop_y:-crop_y or None]
#
def add_pad(im, pad=1, const=0):
    return np.pad(im, [(0,0),(0,0),(pad,pad),(pad,pad)], 'constant', constant_values=const)

conv_T = T.nnet.conv.conv2d(add_pad(test_photo), W, border_mode='valid', subsample=(1,1)).eval()

# Add bias and apply rectifier
# (Theano/lasagne version: activation = conved + self.b.dimshuffle('x', 0, 'x', 'x'))
conv_T += b[None,:,None,None]
conv_T[conv_T<0] = 0

print "\nconv_T\n","------"
print "conv_T.shape", conv_T.shape
print "conv_T[0,2,1]", conv_T[0,2,1]


#
# 3. GEMM
#

#
# Each row is a "patch" of size 3x3x3
# http://petewarden.com/2015/04/20/why-gemm-is-at-the-heart-of-deep-learning/
#
test_photo_im2col = im_to_col(test_photo[0], 3, pad=1).eval()
test_photo_gemm = test_photo_im2col.reshape(IMAGE_W*IMAGE_W,3*3*3)

print "\nconv_GEMM\n", "---------"
print "test_photo_im2col.shape", test_photo_im2col.shape
print "test_photo_gemm.shape", test_photo_gemm.shape
print "params_gemm.shape", params_gemm.shape

#
# reshape weights to len(patch) rows * num_kernels cols
# 64x3x3x3
#
params_gemm = W.reshape(NUM_K,3*3*3).T

#
# Do mega dot product (GEMM)
#
conv_GEMM = np.dot(test_photo_gemm, params_gemm) 
conv_GEMM = np.array([conv_GEMM.T.reshape(NUM_K,IMAGE_W,IMAGE_W)])

#
# Add bias and apply rectifier
#
conv_GEMM += b[None,:,None,None]
conv_GEMM[conv_GEMM<0] = 0 # rectify

#
# 4. Results
#
print "conv_GEMM[0,2,1]", result_end[0,2,1]
print
print "assert np.allclose(conv_T, conv_lasagne) True"
assert np.allclose(conv_T, conv_lasagne)
print "assert np.allclose(conv_GEMM, conv_lasagne) True"
assert np.allclose(conv_GEMM, conv_lasagne)


net info
--------
W.shape (64, 3, 3, 3) b.shape (64,)

conv_lasagne
---------
conv_lasagne.shape (1, 64, 16, 16)
conv_lasagne[0,2,1] [ 0.00126734  0.00186376  0.00186678  0.0018698   0.00187282  0.00187584
  0.00187886  0.00188188  0.0018849   0.00188792  0.00189094  0.00189396
  0.00189698  0.0019      0.00190302  0.00318206]

conv_T
------
conv_T.shape (1, 64, 16, 16)
conv_T[0,2,1] [ 0.00126734  0.00186376  0.00186678  0.0018698   0.00187282  0.00187584
  0.00187886  0.00188188  0.0018849   0.00188792  0.00189094  0.00189396
  0.00189698  0.0019      0.00190302  0.00318206]

conv_GEMM
---------
test_photo_im2col.shape (16, 16, 3, 3, 3)
test_photo_gemm.shape (256, 27)
params_gemm.shape (27, 64)
conv_GEMM[0,2,1] [ 0.00126734  0.00186376  0.00186678  0.0018698   0.00187282  0.00187584
  0.00187886  0.00188188  0.0018849   0.00188792  0.00189094  0.00189396
  0.00189698  0.0019      0.00190302  0.00318206]

assert np.allclose(conv_T, conv_lasagne) True
assert np.allclose(conv_GEMM, conv