# Sources
* https://docs.chainer.org/en/stable/tutorial/basic.html
* https://docs.chainer.org/en/stable/tutorial/gpu.html

## See Also:
* https://github.com/hido/chainer-handson/blob/master/chainer.ipynb


In [1]:
import numpy as np
import cupy
import chainer
from chainer import cuda, Function, gradient_check, report, training, utils, Variable
from chainer import datasets, iterators, optimizers, serializers
from chainer import Link, Chain, ChainList
import chainer.functions as F
import chainer.links as L
from chainer.training import extensions

In [2]:
# Forward/Backward Computation
x_data = np.array([5], dtype=np.float32)
x = Variable(x_data)
print('x.grad INIT',x.grad)

y = x**2 - 2 * x + 1
print('y.data',y.data)

#y.grad = np.ones((1,), dtype=np.float32) # Not needed since dim=1,
y.backward()
print('x.grad',x.grad) # == 2*x - 2

z = 2*x
y = x**2 - z + 1
y.backward(retain_grad=True)
print('z.grad',z.grad)

x = Variable(np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32))
y = x**2 - 2*x + 1
y.grad = np.ones((2, 3), dtype=np.float32) # Needed since dim!=1,
y.backward()
print('y.data',y.data)
print('x.grad',x.grad) # == 2*x - 2

x.grad INIT None
y.data [ 16.]
x.grad [ 8.]
z.grad [-1.]
y.data [[  0.   1.   4.]
 [  9.  16.  25.]]
x.grad [[  0.   2.   4.]
 [  6.   8.  10.]]


In [3]:
# Links
f = L.Linear(3, 2)
print('f.W.data',f.W.data)
print('f.b.data',f.b.data)

x = Variable(np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32))
y = f(x)
print('y.data',y.data)

f.cleargrads() # if missing then grad will be NaN
# print('f.W.grad',f.W.grad) # after cleargrads(), set to None
y.grad = np.ones((2, 2), dtype=np.float32)
y.backward()
print('f.W.grad',f.W.grad)

f.W.data [[-0.25846836  0.14406465  0.23706345]
 [-0.26762658  0.23766576 -1.45790076]]
f.b.data [ 0.  0.]
y.data [[ 0.74085128 -4.16599703]
 [ 1.10883045 -8.62958145]]
f.W.grad [[ 5.  7.  9.]
 [ 5.  7.  9.]]


In [4]:
# Write a model as a chain
class MyChain(Chain):
    def __init__(self):
        super(MyChain, self).__init__()
        with self.init_scope():
            self.l1 = L.Linear(4, 3)
            self.l2 = L.Linear(3, 2)

    def __call__(self, x):
        h = self.l1(x)
        return self.l2(h)

class MyChain2(ChainList):
    def __init__(self):
        super(MyChain2, self).__init__(
            L.Linear(4, 3),
            L.Linear(3, 2),
        )

    def __call__(self, x):
        h = self[0](x)
        return self[1](h)

In [5]:
# Optimizer
model = MyChain()
optimizer = optimizers.SGD()
optimizer.setup(model)
optimizer.add_hook(chainer.optimizer.WeightDecay(0.0005))
print('model.l1.W.data',model.l1.W.data)
print('model.l1.b.data',model.l1.b.data)

# Manually call optimizer.update using lossfun
def lossfun(arg1, arg2):
        # calculate loss
        loss = F.sum(model(arg1 - arg2))
        return loss
arg1 = np.random.uniform(-1, 1, (2, 4)).astype('f')
arg2 = np.random.uniform(-1, 1, (2, 4)).astype('f')
optimizer.update(lossfun, chainer.Variable(arg1), chainer.Variable(arg2))
print('AFTER OPTIMIZER UPDATE')
print('model.l1.W.data',model.l1.W.data)
print('model.l1.b.data',model.l1.b.data)

model.l1.W.data [[ 0.26474601  0.65291721 -1.13191652  0.16819128]
 [ 0.25721487  0.63281906  0.47706214  1.12798083]
 [-0.03506165 -0.00135101 -0.40000299 -0.14114116]]
model.l1.b.data [ 0.  0.  0.]
AFTER OPTIMIZER UPDATE
model.l1.W.data [[ 0.26232165  0.65381378 -1.13263738  0.16930406]
 [ 0.28351146  0.62304968  0.48494422  1.11588883]
 [-0.05419634  0.00575511 -0.40573788 -0.13234614]]
model.l1.b.data [ 0.00210259 -0.02281998  0.01660428]


In [6]:
# Example: Multi-layer Perceptron on MNIST
train, test = datasets.get_mnist()
train_iter = iterators.SerialIterator(train, batch_size=100, shuffle=True)
test_iter = iterators.SerialIterator(test, batch_size=100, repeat=False, shuffle=False)

class MLP(Chain):
    def __init__(self, n_units, n_out):
        super(MLP, self).__init__()
        with self.init_scope():
            # the size of the inputs to each layer will be inferred
            self.l1 = L.Linear(None, n_units)  # n_in -> n_units
            self.l2 = L.Linear(None, n_units)  # n_units -> n_units
            self.l3 = L.Linear(None, n_out)    # n_units -> n_out

    def __call__(self, x):
        h1 = F.leaky_relu(self.l1(x), slope=0.1)
        h2 = F.leaky_relu(self.l2(h1), slope=0.1)
        y = self.l3(h2)
        return y

class Classifier(Chain):
    def __init__(self, predictor):
        super(Classifier, self).__init__()
        with self.init_scope():
            self.predictor = predictor

    def __call__(self, x, t):
        y = self.predictor(x)
        loss = F.softmax_cross_entropy(y, t)
        accuracy = F.accuracy(y, t)
        report({'loss': loss, 'accuracy': accuracy}, self)
        return loss

model = L.Classifier(MLP(100, 10))
optimizer = optimizers.SGD()
optimizer.setup(model)

updater = training.StandardUpdater(train_iter, optimizer)
trainer = training.Trainer(updater, (20, 'epoch'), out='result')

trainer.extend(extensions.Evaluator(test_iter, model))
trainer.extend(extensions.LogReport())
trainer.extend(extensions.PrintReport(['epoch', 'main/accuracy', 'validation/main/accuracy']))
#trainer.extend(extensions.ProgressBar())
trainer.run()  

epoch       main/accuracy  validation/main/accuracy
[J1           0.719817       0.8654                    
[J2           0.877417       0.8952                    
[J3           0.8968         0.9054                    
[J4           0.905267       0.9143                    
[J5           0.910817       0.9184                    
[J6           0.9162         0.9219                    
[J7           0.920033       0.9241                    
[J8           0.923667       0.9283                    
[J9           0.927267       0.9318                    
[J10          0.930367       0.9339                    
[J11          0.933533       0.935                     
[J12          0.936183       0.9391                    
[J13          0.9385         0.9403                    
[J14          0.9405         0.9413                    
[J15          0.94285        0.9417                    
[J16          0.94495        0.9457                    
[J17          0.946717       0.9466

In [10]:
# Run Neural Networks on a Single GPU

print('chainer.cuda.available',chainer.cuda.available)
print('chainer.cuda.cudnn_enabled',chainer.cuda.cudnn_enabled)

train, test = datasets.get_mnist()
train_iter = iterators.SerialIterator(train, batch_size=100, shuffle=True)
test_iter = iterators.SerialIterator(test, batch_size=100, repeat=False, shuffle=False)

class MLP(Chain):
    def __init__(self, n_units, n_out):
        super(MLP, self).__init__()
        with self.init_scope():
            # the size of the inputs to each layer will be inferred
            self.l1 = L.Linear(None, n_units).to_gpu()  # n_in -> n_units
            self.l2 = L.Linear(None, n_units).to_gpu()  # n_units -> n_units
            self.l3 = L.Linear(None, n_out).to_gpu()    # n_units -> n_out

    def __call__(self, x):
        h1 = F.elu(self.l1(x))
        h2 = F.elu(self.l2(h1))
        y = self.l3(h2)
        return y

class Classifier(Chain):
    def __init__(self, predictor):
        super(Classifier, self).__init__()
        with self.init_scope():
            self.predictor = predictor

    def __call__(self, x, t):
        y = self.predictor(x)
        loss = F.softmax_cross_entropy(y, t)
        accuracy = F.accuracy(y, t)
        report({'loss': loss, 'accuracy': accuracy}, self)
        return loss

model = L.Classifier(MLP(100, 10))
optimizer = optimizers.Adam(alpha=1e-4, beta1=0.9, beta2=0.999)
optimizer.setup(model)

updater = training.StandardUpdater(train_iter, optimizer, device=0)
trainer = training.Trainer(updater, (20, 'epoch'), out='result')

trainer.extend(extensions.Evaluator(test_iter, model, device=0))
trainer.extend(extensions.LogReport())
trainer.extend(extensions.PrintReport(['epoch', 'main/accuracy', 'validation/main/accuracy']))
#trainer.extend(extensions.ProgressBar())
trainer.run()  

chainer.cuda.available True
chainer.cuda.cudnn_enabled True
epoch       main/accuracy  validation/main/accuracy
[J1           0.802317       0.901                     
[J2           0.903517       0.9146                    
[J3           0.916201       0.9222                    
[J4           0.924268       0.9274                    
[J5           0.929767       0.9334                    
[J6           0.935434       0.9368                    
[J7           0.939851       0.9394                    
[J8           0.943551       0.9432                    
[J9           0.94735        0.9475                    
[J10          0.950717       0.9472                    
[J11          0.953783       0.9527                    
[J12          0.95605        0.9539                    
[J13          0.958801       0.9565                    
[J14          0.9609         0.9569                    
[J15          0.962883       0.9585                    
[J16          0.965068       0.9