# Fully Convolutional Networks (model initialization)

### download VGGNet and load weights with Chainer

In [1]:
import os.path
import numpy as np

import chainer
from chainer import serializers
from chainer import functions as F
from chainer import links as L
import chainer.functions.caffe

In [2]:
path_chainermodel = 'VGG_ILSVRC_16_layers.chainermodel'
path_caffemodel = 'VGG_ILSVRC_16_layers.caffemodel'

In [3]:
if os.path.isfile(path_chainermodel):
    from vggnet16 import VGGNet16
    vggnet = VGGNet16()
    chainer.serializers.load_hdf5(path_chainermodel, vggnet)
else:
    if not os.path.isfile(path_caffemodel):
        ! wget http://www.robots.ox.ac.uk/~vgg/software/very_deep/caffe/VGG_ILSVRC_16_layers.caffemodel
    vggnet = F.caffe.CaffeFunction(path_caffemodel)
    chainer.serializers.save_hdf5(path_chainermodel, vggnet)

### initialize added conv (score) layer with 0 and deconv layer with bilinear interpolation

In [4]:
w_up2 = np.zeros(shape=(81,81,4,4), dtype=np.float32)
w_up8 = np.zeros(shape=(81,81,16,16), dtype=np.float32)
for i in range(w_up2.shape[0]):
    w_up2[i, i, :, :] = 0.25
for i in range(w_up8.shape[0]):
    w_up8[i, i, :, :] = 0.25

In [5]:
class FCN(chainer.Chain):

    def __init__(self):
        super(self.__class__, self).__init__(
            conv1_1=L.Convolution2D(3, 64, ksize=3, stride=1, pad=100),
            conv1_2=L.Convolution2D(64, 64, ksize=3, stride=1, pad=1),

            conv2_1=L.Convolution2D(64, 128, ksize=3, stride=1, pad=1),
            conv2_2=L.Convolution2D(128, 128, ksize=3, stride=1, pad=1),

            conv3_1=L.Convolution2D(128, 256, ksize=3, stride=1, pad=1),
            conv3_2=L.Convolution2D(256, 256, ksize=3, stride=1, pad=1),
            conv3_3=L.Convolution2D(256, 256, ksize=3, stride=1, pad=1),

            conv4_1=L.Convolution2D(256, 512, ksize=3, stride=1, pad=1),
            conv4_2=L.Convolution2D(512, 512, ksize=3, stride=1, pad=1),
            conv4_3=L.Convolution2D(512, 512, ksize=3, stride=1, pad=1),

            conv5_1=L.Convolution2D(512, 512, ksize=3, stride=1, pad=1),
            conv5_2=L.Convolution2D(512, 512, ksize=3, stride=1, pad=1),
            conv5_3=L.Convolution2D(512, 512, ksize=3, stride=1, pad=1),

            fc6=L.Convolution2D(512, 4096, ksize=7, stride=1, pad=0),
            fc7=L.Convolution2D(4096, 4096, ksize=1, stride=1, pad=0),
            
            
            # fc7
            score_fr=L.Convolution2D(4096, 81, ksize=1, stride=1, pad=0),
            upscore2=L.Deconvolution2D(81, 81, ksize=4, stride=2, pad=0, \
                                       nobias=True, initialW=w_up2),
            
            # pool4
            score_pool4=L.Convolution2D(512, 81, ksize=1, stride=1, pad=0, bias=0, \
                                        initialW=np.zeros(shape=(81,512,1,1), dtype=np.float32)),
            upscore_pool4=L.Deconvolution2D(81, 81, ksize=4, stride=2, pad=0, \
                                            nobias=True, initialW=w_up2),
            
            # pool3
            score_pool3=L.Convolution2D(256, 81, ksize=1, stride=1, pad=0, bias=0, \
                                        initialW=np.zeros(shape=(81,256,1,1), dtype=np.float32)),
            upscore8=L.Deconvolution2D(81, 81, ksize=16, stride=8, pad=0, \
                                       nobias=True, initialW=w_up8),
        )

### copy weights of VGGNet to FCN

In [6]:
fcn = FCN()

for src_param in vggnet.children():
    for dst_param in fcn.children():
        if dst_param.name == src_param.name:
            try:
                dst_param.copyparams(src_param)
            except Exception as e:
                print(e)
            continue

could not broadcast input array from shape (4096,25088) into shape (4096,512,7,7)
could not broadcast input array from shape (4096,4096) into shape (4096,4096,1,1)


In [7]:
fcn.fc6.W.data = vggnet.fc6.W.data.reshape(4096, 512, 7, 7)
fcn.fc6.b.data = vggnet.fc6.b.data
fcn.fc7.W.data = vggnet.fc7.W.data.reshape(4096, 4096, 1, 1)
fcn.fc7.b.data = vggnet.fc7.b.data

In [8]:
chainer.serializers.save_hdf5('fcn_init.chainermodel', fcn)