In [31]:
from azureml import Workspace

ws = Workspace()
ds = ws.datasets['MNIST Train 60k 28x28 dense']
ds2 = ws.datasets['MNIST Test 10k 28x28 dense']
train = ds.to_dataframe().values
test = ds2.to_dataframe().values

In [80]:
try:
    import chainer
except ImportError, e:
    !pip install chainer
    import chainer
    
import os
import six
import numpy as np
import chainer.functions as F
import chainer.links as L
from chainer import serializers
from chainer import optimizers

In [65]:
data_train = train[:,1:].astype(np.float32)
data_test = test[:,1:].astype(np.float32)
labels_train = train[:,0].astype(np.int32)
labels_test = test[:,0].astype(np.int32)

data_train /=255
data_test /=255

N = labels_train.size
N_test = labels_test.size

data_train = data_train.reshape(N,1,28,28)
data_test = data_test.reshape(N_test,1,28,28)

## Start defining Neural Network architectures

One Hidden Layer Network

In [101]:
nb_nodes = 200

class OneLayer(chainer.Chain):
    def __init__(self):
        super(OneLayer, self).__init__(
            l1=L.Linear(None,nb_nodes),
            l2=L.Linear(nb_nodes,10)            
        )
    def __call__(self,x):
        h1 =F.sigmoid(self.l1(x))
        y =self.l2(h1)
        return y

Two Hidden Layers Network

In [102]:
class TwoLayer(chainer.Chain):
    def __init__(self):
        super(TwoLayer, self).__init__(
            l1=L.Linear(None,nb_nodes),
            l2=L.Linear(nb_nodes,nb_nodes),
            l3=L.Linear(nb_nodes,10)
        )
    def __call__(self,x):
        h1 =F.sigmoid(self.l1(x))
        h2 = F.sigmoid(self.l2(h1))
        y =self.l3(h2)
        return y

Basic Convolution

In [126]:
class SimpleCNN(chainer.Chain):
    def __init__(self):
        super(SimpleCNN, self).__init__(
            c1=L.Convolution2D(1, 5, 5),
            c2=L.Convolution2D(5, 10, 5),
            l3=L.Linear(4000, 100),
            l4=L.Linear(100,10)
        )

    def __call__(self, x):
        nl1 = self.c1(x)
        nl2 = self.c2(nl1)
        nl3 = F.sigmoid(nl2)
        nl4 = self.l3(nl3)
        nl5 = F.sigmoid(nl4)
        y = self.l4(nl5)
        return y

Convolution and Pooling

In [145]:
class PoolingCNN(chainer.Chain):
    def __init__(self):
        super(PoolingCNN,self).__init__(
            c1=L.Convolution2D(1, 5, 5),
            c2=L.Convolution2D(5, 10, 5),
            l3=L.Linear(160, 100),
            l4=L.Linear(100,10)
        )
    
    def __call__(self, x):
        nl1 = self.c1(x)
        nl2 = F.max_pooling_2d(nl1, 2, stride=2)
        nl3 = self.c2(nl2)
        nl4 = F.max_pooling_2d(nl3, 2, stride=2)
        nl5 = F.sigmoid(nl4)
        nl6 = self.l3(nl5)
        nl7 = F.sigmoid(nl6)
        y = self.l4(nl7)
        return y
    

Train model

In [105]:
def train_model(model, batchsize = 100, num_epochs = 20):
    """
    Trains a Chainer model.
    
    Arguments:
    
    model -- The model to train
    
    Keyword arguments:
    
    batchsize -- The batchsize to use during training
    epoch -- The number of epochs to train
    """
    
    optimizer = optimizers.Adam()
    optimizer.setup(model)
    accuracy = 0

    for epoch in six.moves.range(1, num_epochs + 1):
        print('epoch', epoch)

        # training
        perm = np.random.permutation(N)
        sum_accuracy = 0
        sum_loss = 0
        
        for i in six.moves.range(0, N, batchsize):
            x = chainer.Variable(np.asarray(data_train[perm[i:i + batchsize]]))
            t = chainer.Variable(np.asarray(labels_train[perm[i:i + batchsize]]))

            #run the forward pass, compute the loss, do backprop and then update the weights
            optimizer.update(model, x, t)

            sum_loss += float(model.loss.data) * len(t.data)
            sum_accuracy += float(model.accuracy.data) * len(t.data)
        
        print('train mean loss={}, accuracy={}'.format(sum_loss / N, sum_accuracy / N))

        # evaluation
        sum_accuracy = 0
        sum_loss = 0
        for i in six.moves.range(0, N_test, batchsize):
            x = chainer.Variable(np.asarray(data_test[i:i + batchsize]),
                                 volatile='on')
            t = chainer.Variable(np.asarray(labels_test[i:i + batchsize]),
                                 volatile='on')
            loss = model(x, t)
            sum_loss += float(loss.data) * len(t.data)
            sum_accuracy += float(model.accuracy.data) * len(t.data)

        print('test  mean loss={}, accuracy={}'.format(sum_loss / N_test, sum_accuracy / N_test))
        accuracy = sum_accuracy / N_test
    return accuracy

In [146]:
onelayer_model = L.Classifier(OneLayer(),lossfun=F.softmax_cross_entropy)
twolayer_model = L.Classifier(TwoLayer(),lossfun=F.softmax_cross_entropy)
conv_model = L.Classifier(SimpleCNN(), lossfun=F.softmax_cross_entropy)
pool_model = L.Classifier(PoolingCNN(), lossfun=F.softmax_cross_entropy)

Train and evalute one hidden layer network

In [108]:
accuracy = np.zeros(4)
accuracy[0] = train_model(onelayer_model, num_epochs = 20)

('epoch', 1)
train mean loss=0.0138009185843, accuracy=0.99763333559
test  mean loss=0.0667272699838, accuracy=0.979000006318
('epoch', 2)
train mean loss=0.0109935877408, accuracy=0.998666667938
test  mean loss=0.0665229406033, accuracy=0.979800004959
('epoch', 3)
train mean loss=0.00923426642296, accuracy=0.999116667509
test  mean loss=0.0679031881661, accuracy=0.979500006437
('epoch', 4)
train mean loss=0.00790773673488, accuracy=0.999333333969
test  mean loss=0.0669504509203, accuracy=0.979800004959
('epoch', 5)
train mean loss=0.00686494637281, accuracy=0.999366667271
test  mean loss=0.0675697085301, accuracy=0.979800006747
('epoch', 6)
train mean loss=0.00581161586975, accuracy=0.99956666708
test  mean loss=0.0684038205327, accuracy=0.979800004363
('epoch', 7)
train mean loss=0.00487792049826, accuracy=0.999733333588
test  mean loss=0.0680542972581, accuracy=0.979500004053
('epoch', 8)
train mean loss=0.00420592786594, accuracy=0.999816666842
test  mean loss=0.0687279174062, accu

Train and evaluate Two hidden layers network

In [109]:
accuracy[1] = train_model(twolayer_model, num_epochs = 20)

('epoch', 1)
train mean loss=0.00479928974365, accuracy=0.998800001144
test  mean loss=0.0853722799084, accuracy=0.98060000658
('epoch', 2)
train mean loss=0.00242555570214, accuracy=0.999650000334
test  mean loss=0.0849670733628, accuracy=0.979700006247
('epoch', 3)
train mean loss=0.00534159680089, accuracy=0.99848333478
test  mean loss=0.0922220753541, accuracy=0.979400007129
('epoch', 4)
train mean loss=0.00262836227231, accuracy=0.999416667223
test  mean loss=0.08630336076, accuracy=0.98050000608
('epoch', 5)
train mean loss=0.000738999309287, accuracy=1.0
test  mean loss=0.0858044913109, accuracy=0.981300007105
('epoch', 6)
train mean loss=0.00113235135634, accuracy=0.999800000191
test  mean loss=0.0895912206624, accuracy=0.980100006461
('epoch', 7)
train mean loss=0.0067389647451, accuracy=0.997733335495
test  mean loss=0.0932179689693, accuracy=0.979800006151
('epoch', 8)
train mean loss=0.00114720687534, accuracy=0.999816666842
test  mean loss=0.0870978020306, accuracy=0.98230

Train and evaluate Basic Convolution Network

In [128]:
accuracy[2] = train_model(conv_model, num_epochs = 20)

('epoch', 1)
train mean loss=0.458803639207, accuracy=0.888016668359
test  mean loss=0.126975066625, accuracy=0.96650000453
('epoch', 2)
train mean loss=0.101595068521, accuracy=0.972650008202
test  mean loss=0.0735563258268, accuracy=0.979200006723
('epoch', 3)
train mean loss=0.0652188406714, accuracy=0.982350010773
test  mean loss=0.0549358542939, accuracy=0.982400007844
('epoch', 4)
train mean loss=0.0478772817296, accuracy=0.986633343498
test  mean loss=0.0492632593343, accuracy=0.984800006747
('epoch', 5)
train mean loss=0.0376000160693, accuracy=0.989800008237
test  mean loss=0.0475258720492, accuracy=0.984900007248
('epoch', 6)
train mean loss=0.0300541626856, accuracy=0.99215000699
test  mean loss=0.0424096124654, accuracy=0.986300008297
('epoch', 7)
train mean loss=0.0234046762064, accuracy=0.993650005758
test  mean loss=0.0393977614006, accuracy=0.987200004458
('epoch', 8)
train mean loss=0.0186166798372, accuracy=0.995316671133
test  mean loss=0.039381391533, accuracy=0.986

Train and evaluate Convolution and Pooling Network

In [147]:
accuracy[3] = train_model(pool_model, num_epochs = 20)

('epoch', 1)
train mean loss=0.847008854498, accuracy=0.777050000255
test  mean loss=0.258624150343, accuracy=0.932300003767
('epoch', 2)
train mean loss=0.202863755325, accuracy=0.944400002162
test  mean loss=0.142996984748, accuracy=0.95740000248
('epoch', 3)
train mean loss=0.131877895265, accuracy=0.96226667136
test  mean loss=0.107767989729, accuracy=0.969200006723
('epoch', 4)
train mean loss=0.101724174898, accuracy=0.969800008138
test  mean loss=0.0852272914047, accuracy=0.974100006223
('epoch', 5)
train mean loss=0.0857837808629, accuracy=0.974366675417
test  mean loss=0.0770316862804, accuracy=0.976600006223
('epoch', 6)
train mean loss=0.0750181726723, accuracy=0.977400009632
test  mean loss=0.0642613154696, accuracy=0.980500005484
('epoch', 7)
train mean loss=0.0667218869211, accuracy=0.98025001049
test  mean loss=0.0614543408761, accuracy=0.980100007653
('epoch', 8)
train mean loss=0.0610540196641, accuracy=0.981350010633
test  mean loss=0.0582931590476, accuracy=0.9812000

In [148]:
accuracy

array([ 0.98000001,  0.98260001,  0.98790001,  0.98600001])