# Fine turning
以下のサイトを参考に
http://qiita.com/tabe2314/items/6c0c1b769e12ab1e2614

In [1]:
#!/usr/bin/env python
from __future__ import print_function
import argparse
import sys; sys.argv=['']; del sys

import numpy as np
import chainer
import chainer.functions as F
import chainer.links as L
from chainer import training
from chainer.training import extensions
from chainer import Variable
import copy # test iterator copy


In [22]:
def copy_model(src, dst):
    assert isinstance(src, chainer.link.Chain)
    assert isinstance(dst, chainer.link.Chain)
    for child in src.children():
        if child.name not in dst.__dict__: continue
        dst_child = dst[child.name]
        if type(child) != type(dst_child): continue
        if isinstance(child, chainer.link.Chain):
            copy_model(child, dst_child)
        if isinstance(child, chainer.link.Link):
            match = True
            for a, b in zip(child.namedparams(), dst_child.namedparams()):
                if a[0] != b[0]:
                    match = False
                    break
                if a[1].data.shape != b[1].data.shape:
                    match = False
                    break
            if not match:
                print('Ignore %s because of parameter mismatch' % child.name)
                continue
            for a, b in zip(child.namedparams(), dst_child.namedparams()):
                b[1].data = a[1].data
            print('Copy %s' % child.name)

In [12]:
dataset = 'cifar10'
# dataset = 'mnist'

batchsize = 128
epochsize = 300

# Load dataset

In [13]:
if dataset == 'cifar10':
    print('Using CIFAR10 dataset.')
    class_labels = 10
    train, test = chainer.datasets.get_cifar10()
elif dataset == 'cifar100':
    print('Using CIFAR100 dataset.')
    class_labels = 100
    train, test = chainer.datasets.get_cifar100()
elif dataset == 'mnist':
    print('Using mnist dataset.')
    class_labels = 10
    train, test = chainer.datasets.get_mnist()
    train = [(it[0].reshape(1, 28, 28),it[1]) for it in train]
    test = [(it[0].reshape(1, 28, 28),it[1]) for it in test]

Using CIFAR10 dataset.


In [14]:
# Load the cifar dataset
train_iter = chainer.iterators.SerialIterator(train, batchsize)
test_iter = chainer.iterators.SerialIterator(test, batchsize, repeat=False, shuffle=False)
testsize = len(test)
testsize

10000

# Load Caffe model

In [3]:
from chainer.links.caffe import CaffeFunction
try:
    import cPickle as pickle
except:
    import pickle

CAFFE_MODEL_NAME = 'VGG_ILSVRC_16_layers.caffemodel'
OUTPUT_NAME = 'vgg16.pkl'

In [4]:
vgg = CaffeFunction('/home/komatsu/Downloads/'+CAFFE_MODEL_NAME)
pickle.dump(vgg, open('/home/komatsu/work/yosenabe/practices/chainer_pkl/'+OUTPUT_NAME, 'wb'))


In [4]:
%time
vgg= pickle.load(open('/home/komatsu/work/yosenabe/practices/chainer_pkl/'+OUTPUT_NAME, 'rb'))

In [5]:
for it in vgg.namedparams():
    print(it)

(u'/conv1_1/W', <variable W>)
(u'/conv1_1/b', <variable b>)
(u'/conv1_2/W', <variable W>)
(u'/conv1_2/b', <variable b>)
(u'/conv2_1/W', <variable W>)
(u'/conv2_1/b', <variable b>)
(u'/conv2_2/W', <variable W>)
(u'/conv2_2/b', <variable b>)
(u'/conv3_1/W', <variable W>)
(u'/conv3_1/b', <variable b>)
(u'/conv3_2/W', <variable W>)
(u'/conv3_2/b', <variable b>)
(u'/conv3_3/W', <variable W>)
(u'/conv3_3/b', <variable b>)
(u'/conv4_1/W', <variable W>)
(u'/conv4_1/b', <variable b>)
(u'/conv4_2/W', <variable W>)
(u'/conv4_2/b', <variable b>)
(u'/conv4_3/W', <variable W>)
(u'/conv4_3/b', <variable b>)
(u'/conv5_1/W', <variable W>)
(u'/conv5_1/b', <variable b>)
(u'/conv5_2/W', <variable W>)
(u'/conv5_2/b', <variable b>)
(u'/conv5_3/W', <variable W>)
(u'/conv5_3/b', <variable b>)
(u'/fc6/W', <variable W>)
(u'/fc6/b', <variable b>)
(u'/fc7/W', <variable W>)
(u'/fc7/b', <variable b>)
(u'/fc8/W', <variable W>)
(u'/fc8/b', <variable b>)


# Create network

In [15]:
class VGG(chainer.Chain):
    def __init__(self, class_labels):
        initializer = chainer.initializers.HeNormal()
        super(VGG, self).__init__(
             # the size of the inputs to each layer will be inferred
            conv1_1=L.Convolution2D(3, 64, 3, stride=1, pad=1),
            conv1_2=L.Convolution2D(64, 64, 3, stride=1, pad=1),
            conv2_1=L.Convolution2D(64, 128, 3, stride=1, pad=1),
            conv2_2=L.Convolution2D(128, 128, 3, stride=1, pad=1),
            conv3_1=L.Convolution2D(128, 256, 3, stride=1, pad=1),
            conv3_2=L.Convolution2D(256, 256, 3, stride=1, pad=1),
            conv3_3=L.Convolution2D(256, 256, 3, stride=1, pad=1),
            conv4_1=L.Convolution2D(256, 512, 3, stride=1, pad=1),
            conv4_2=L.Convolution2D(512, 512, 3, stride=1, pad=1),
            conv4_3=L.Convolution2D(512, 512, 3, stride=1, pad=1),
            new_fc5 = L.Linear(512*2*2, 1000, initialW=initializer),
            new_fc6 = L.Linear(1000, class_labels, initialW=initializer),
        )

    def __call__(self, x):
        h = F.relu(self.conv1_1(x))
        h = F.relu(self.conv1_2(h))
        h = F.max_pooling_2d(h, 2, stride=2)
        
        h = F.relu(self.conv2_1(h))
        h = F.relu(self.conv2_2(h))
        h = F.max_pooling_2d(h, 2, stride=2)

        h = F.relu(self.conv3_1(h))
        h = F.relu(self.conv3_2(h))
        h = F.relu(self.conv3_3(h))
        h = F.max_pooling_2d(h, 2, stride=2)

        h = F.relu(self.conv4_1(h))
        h = F.relu(self.conv4_2(h))
        h = F.relu(self.conv4_3(h))
        h = F.max_pooling_2d(h, 2, stride=2)
        
        h = F.relu(self.new_fc5(h))
        y = self.new_fc6(h)        
        return y

In [16]:
new_vgg = VGG(class_labels)

# Load from Caffemodel

In [23]:
new_vgg['conv1_1'].W.data[0]

array([[[-0.16854778, -0.12473571, -0.13066512],
        [ 0.03298501,  0.09515205,  0.32222724],
        [ 0.02890779,  0.08020072, -0.36099228]],

       [[ 0.21751988,  0.30467194,  0.26698956],
        [ 0.17851426, -0.1768859 , -0.00296475],
        [ 0.02764448,  0.33886912, -0.15486431]],

       [[ 0.0387103 , -0.67247236,  0.09884518],
        [ 0.03921968, -0.13343205, -0.11175998],
        [-0.13195056,  0.16540597, -0.11617035]]], dtype=float32)

In [24]:
copy_model(vgg, new_vgg)

Copy conv1_1
Copy conv1_2
Copy conv2_1
Copy conv2_2
Copy conv3_1
Copy conv3_2
Copy conv3_3
Copy conv4_1
Copy conv4_2
Copy conv4_3


In [26]:
vgg['conv1_1'].W.data[0]

array([[[ 0.42947057,  0.373467  , -0.06136011],
        [ 0.27476987,  0.03868078, -0.36722335],
        [-0.05746817, -0.26224968, -0.35009676]],

       [[ 0.55037946,  0.44007453, -0.08138704],
        [ 0.34573907,  0.04063221, -0.45350131],
        [-0.05863491, -0.33066967, -0.4850302 ]],

       [[ 0.4800154 ,  0.4085474 , -0.06514555],
        [ 0.31047726,  0.05020237, -0.40338343],
        [-0.05087169, -0.28522751, -0.41851634]]], dtype=float32)

In [25]:
new_vgg['conv1_1'].W.data[0]

array([[[ 0.42947057,  0.373467  , -0.06136011],
        [ 0.27476987,  0.03868078, -0.36722335],
        [-0.05746817, -0.26224968, -0.35009676]],

       [[ 0.55037946,  0.44007453, -0.08138704],
        [ 0.34573907,  0.04063221, -0.45350131],
        [-0.05863491, -0.33066967, -0.4850302 ]],

       [[ 0.4800154 ,  0.4085474 , -0.06514555],
        [ 0.31047726,  0.05020237, -0.40338343],
        [-0.05087169, -0.28522751, -0.41851634]]], dtype=float32)

# RUN

In [27]:
class Classifier(chainer.Chain):
    def __init__(self, predictor):
        super(Classifier, self).__init__(predictor=predictor)
        
    def clear(self):
        self.loss = None
        self.accuracy = None
        
    def __call__(self, x, t):
        self.clear()
        y = self.predictor(x)
        loss = F.softmax_cross_entropy(y, t)
        self.accuracy = F.accuracy(y, t)
        chainer.report({'loss': loss, 'accuracy': self.accuracy}, self)
        return loss


In [29]:
# setup a model
gpu_id = 0 #  use gpu
model = Classifier(new_vgg)

if gpu_id >= 0:
    chainer.cuda.get_device(gpu_id).use()  # Make a specified GPU current
    model.to_gpu()  # Copy the model to the GPU

In [30]:
# setup an optimizer
optimizer = chainer.optimizers.Adam()
optimizer.use_cleargrads()
optimizer.setup(model)
# optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4))

In [31]:
# model error check
batch = train_iter.next()
xp = model.xp
x = xp.asarray([it[0] for it in batch], dtype=np.float32)
t = xp.asarray([it[1] for it in batch], dtype=np.int32)
print(x[0].shape)
# predict without Classifier
y = model.predictor(x)
loss = F.softmax_cross_entropy(y, t)
model.cleargrads()
loss.backward()
optimizer.update()
#print(y.data)
print(loss.data)

(3, 32, 32)
8.82952594757


In [None]:
%%time
from tqdm import tqdm_notebook as tqdm
# from tqdm import tqdm

# run
xp = model.xp

pbar = tqdm(xrange(epochsize), desc='epoch loop')
for epoch in pbar:
    for batch in train_iter:
        # data separation
        x = xp.asarray([it[0] for it in batch], dtype=np.float32)
        t = xp.asarray([it[1] for it in batch], dtype=np.int32)
        # compute grad
        loss = model(x, t)
        model.cleargrads()
        loss.backward()
        optimizer.update()

        # terminate
        if train_iter.is_new_epoch is True:
            break

    # evaluate model   
    sum_loss = 0
    sum_acc = 0
    test_iter_copy = copy.copy(test_iter)
    for test_batch in test_iter_copy:
        # data separation
        x = xp.asarray([it[0] for it in test_batch], dtype=np.float32)
        t = xp.asarray([it[1] for it in test_batch], dtype=np.int32)
        # compute grad
        loss = model(x, t)
        sum_loss += loss.data * len(test_batch)
        sum_acc += model.accuracy.data * len(test_batch)
    mean_loss = sum_loss / testsize
    mean_acc = sum_acc / testsize
#     print(''.format(epoch=epoch))
    print('epoch : {epoch}, Mean loss: {loss}, Mean accuracy: {acc}'.format(epoch=epoch, loss=mean_loss, acc=mean_acc))
    # pbar.set_description('epoch : {epoch}'.format(epoch=epoch))

epoch : 0, Mean loss: 1.19482350349, Mean accuracy: 0.568000018597
epoch : 1, Mean loss: 0.910808622837, Mean accuracy: 0.677100002766
epoch : 2, Mean loss: 0.712756276131, Mean accuracy: 0.760100007057
epoch : 3, Mean loss: 0.64687359333, Mean accuracy: 0.787899971008
epoch : 4, Mean loss: 0.637249410152, Mean accuracy: 0.793699979782
epoch : 5, Mean loss: 0.644981026649, Mean accuracy: 0.793099999428
epoch : 6, Mean loss: 0.635182619095, Mean accuracy: 0.800100028515
epoch : 7, Mean loss: 0.675313711166, Mean accuracy: 0.810100018978
epoch : 8, Mean loss: 0.627174675465, Mean accuracy: 0.807299971581
epoch : 9, Mean loss: 0.679122328758, Mean accuracy: 0.810299992561
epoch : 10, Mean loss: 0.714975953102, Mean accuracy: 0.800499975681
epoch : 11, Mean loss: 0.785280585289, Mean accuracy: 0.812699973583
epoch : 12, Mean loss: 0.814564526081, Mean accuracy: 0.799499988556
epoch : 13, Mean loss: 0.742326855659, Mean accuracy: 0.806999981403
epoch : 14, Mean loss: 0.786305963993, Mean ac

# 以下うまく行かなかった例
Caffemodelのまま，層を変えれば良いと思ったがよくわからないエラーがでて断念した  
また，Caffemodelだとエラー時に何が起こっているかわからないため，
モデルは自分で書いたほうが良い

# Change Last layter for cifar10

In [7]:
class Classifier(chainer.Chain):
    def __init__(self, predictor):
        super(Classifier, self).__init__(predictor=predictor)
        
    def clear(self):
        self.loss = None
        self.accuracy = None
        
    def __call__(self, x, t):
        self.clear()
        y = self.predictor(inputs={'data': x}, outputs=['prob'])
        loss = F.softmax_cross_entropy(y, t)
        self.accuracy = F.accuracy(y, t)
        chainer.report({'loss': loss, 'accuracy': self.accuracy}, self)
        return loss


In [39]:
vgg['fc7'].W.data.shape

(4096, 4096)

In [40]:
vgg['fc8'].W.data.shape

(1000, 4096)

In [87]:
new_fc6 = L.Linear(512, 1000)
new_fc7 = L.Linear(1000, 1000)
new_fc8 = L.Linear(1000, class_labels)

In [88]:
new_fc7.W.data

array([[-0.00398499,  0.04089545,  0.01618574, ...,  0.02687539,
        -0.00602881,  0.00047246],
       [-0.0082648 ,  0.00478303, -0.03781039, ..., -0.01898056,
        -0.01351354, -0.00123579],
       [-0.04749636, -0.06221081,  0.01145141, ...,  0.02494635,
        -0.02649529, -0.0293122 ],
       ..., 
       [-0.01758477,  0.01283393, -0.06990477, ..., -0.00308131,
         0.04999574,  0.04004084],
       [ 0.00892289,  0.01961285, -0.02511998, ...,  0.07210597,
         0.01343249,  0.02732559],
       [ 0.02318517,  0.06497291, -0.00266213, ...,  0.06236425,
        -0.05052614, -0.00730087]], dtype=float32)

In [89]:
new_fc8.W.data.shape

(10, 1000)

In [90]:
vgg['fc6'].W = new_fc6.W
vgg['fc6'].b = new_fc6.b

In [91]:
vgg['fc7'].W = new_fc7.W
vgg['fc7'].b = new_fc7.b

In [92]:
vgg['fc8'].W = new_fc8.W
vgg['fc8'].b = new_fc8.b

In [93]:
vgg.__dict__

{'_children': [u'conv1_1',
  u'conv1_2',
  u'conv2_1',
  u'conv2_2',
  u'conv3_1',
  u'conv3_2',
  u'conv3_3',
  u'conv3_4',
  u'conv4_1',
  u'conv4_2',
  u'conv4_3',
  u'conv4_4',
  u'conv5_1',
  u'conv5_2',
  u'conv5_3',
  u'conv5_4',
  u'fc6',
  u'fc7',
  u'fc8'],
 '_cpu': False,
 '_device_id': 0,
 '_params': [],
 '_persistent': [],
 '_uninitialized_params': {},
 u'conv1_1': <chainer.links.connection.convolution_2d.Convolution2D at 0x7f2acb2a4ad0>,
 u'conv1_2': <chainer.links.connection.convolution_2d.Convolution2D at 0x7f2acb2a4bd0>,
 u'conv2_1': <chainer.links.connection.convolution_2d.Convolution2D at 0x7f2acaf97b10>,
 u'conv2_2': <chainer.links.connection.convolution_2d.Convolution2D at 0x7f2acaf97a10>,
 u'conv3_1': <chainer.links.connection.convolution_2d.Convolution2D at 0x7f2acaf97910>,
 u'conv3_2': <chainer.links.connection.convolution_2d.Convolution2D at 0x7f2acaf97810>,
 u'conv3_3': <chainer.links.connection.convolution_2d.Convolution2D at 0x7f2acaf97710>,
 u'conv3_4': <ch

In [94]:
vgg['layers']

[(u'conv1_1', [u'data'], [u'conv1_1']),
 (u'relu1_1', [u'conv1_1'], [u'conv1_1']),
 (u'conv1_2', [u'conv1_1'], [u'conv1_2']),
 (u'relu1_2', [u'conv1_2'], [u'conv1_2']),
 (u'pool1', [u'conv1_2'], [u'pool1']),
 (u'conv2_1', [u'pool1'], [u'conv2_1']),
 (u'relu2_1', [u'conv2_1'], [u'conv2_1']),
 (u'conv2_2', [u'conv2_1'], [u'conv2_2']),
 (u'relu2_2', [u'conv2_2'], [u'conv2_2']),
 (u'pool2', [u'conv2_2'], [u'pool2']),
 (u'conv3_1', [u'pool2'], [u'conv3_1']),
 (u'relu3_1', [u'conv3_1'], [u'conv3_1']),
 (u'conv3_2', [u'conv3_1'], [u'conv3_2']),
 (u'relu3_2', [u'conv3_2'], [u'conv3_2']),
 (u'conv3_3', [u'conv3_2'], [u'conv3_3']),
 (u'relu3_3', [u'conv3_3'], [u'conv3_3']),
 (u'conv3_4', [u'conv3_3'], [u'conv3_4']),
 (u'relu3_4', [u'conv3_4'], [u'conv3_4']),
 (u'pool3', [u'conv3_4'], [u'pool3']),
 (u'conv4_1', [u'pool3'], [u'conv4_1']),
 (u'relu4_1', [u'conv4_1'], [u'conv4_1']),
 (u'conv4_2', [u'conv4_1'], [u'conv4_2']),
 (u'relu4_2', [u'conv4_2'], [u'conv4_2']),
 (u'conv4_3', [u'conv4_2'], [u'c

In [95]:
batch = train_iter.next()
x = xp.asarray([it[0] for it in batch], dtype=np.float32)
t = xp.asarray([it[1] for it in batch], dtype=np.int32)

In [101]:
chainer.cuda.get_device(gpu_id).use()  # Make a specified GPU current
vgg.to_cpu()  # Copy the model to the GPU

<chainer.links.caffe.caffe_function.CaffeFunction at 0x7f2acaff7d50>

In [97]:
gpu_id = 0 #  use gpu
model = Classifier(vgg)

if gpu_id >= 0:
    chainer.cuda.get_device(gpu_id).use()  # Make a specified GPU current
    model.to_gpu()  # Copy the model to the GPU

ValueError: given link is already registered to another chain by name predictor

# Setup an optimizer

In [20]:
optimizer = chainer.optimizers.Adam()
optimizer.use_cleargrads()
optimizer.setup(model)

In [21]:
from tqdm import tqdm_notebook as tqdm
# from tqdm import tqdm

# run
xp = model.xp

pbar = tqdm(xrange(epochsize))
for epoch in pbar:
    for batch in train_iter:
        # data separation
        x = xp.asarray([it[0] for it in batch], dtype=np.float32)
        t = xp.asarray([it[1] for it in batch], dtype=np.int32)
        # compute grad
        loss = model(x, t)
        model.cleargrads()
        loss.backward()
        optimizer.update()

        # terminate
        if train_iter.is_new_epoch is True:
            break

    # evaluate model   
    sum_loss = 0
    sum_acc = 0
    test_iter_copy = copy.copy(test_iter)
    for test_batch in test_iter_copy:
        # data separation
        x = xp.asarray([it[0] for it in test_batch], dtype=np.float32)
        t = xp.asarray([it[1] for it in test_batch], dtype=np.int32)
        # compute grad
        loss = model(x, t)
        sum_loss += loss.data * len(test_batch)
        sum_acc += model.accuracy.data * len(test_batch)
    mean_loss = sum_loss / testsize
    mean_acc = sum_acc / testsize
#     print(''.format(epoch=epoch))
    print('epoch : {epoch}, Mean loss: {loss}, Mean accuracy: {acc}'.format(epoch=epoch, loss=mean_loss, acc=mean_acc))
    # pbar.set_description('epoch : {epoch}'.format(epoch=epoch))

TypeError: __call__() takes at least 3 arguments (2 given)

In [23]:
model.predictor()

TypeError: __call__() takes at least 3 arguments (2 given)

In [31]:
print()

TypeError: 'instancemethod' object has no attribute '__getitem__'

In [33]:
model

TypeError: 'generator' object has no attribute '__getitem__'