# High-level Caffe2 MNIST Example

In [1]:
import numpy as np
import os
import caffe2
from caffe2.python import core, model_helper, net_drawer, workspace, visualize, brew, optimizer, utils
from caffe2.proto import caffe2_pb2
from common.params import *
from common.utils import *

In [2]:
print(np.__version__)
if GPU:
    device_opts = core.DeviceOption(caffe2_pb2.CUDA, 0)  # Run on GPU
else:
    device_opts = core.DeviceOption(caffe2_pb2.CPU, 0)  # Run on CPU

1.13.0


In [3]:
def create_model(m, device_opts) :
    with core.DeviceScope(device_opts):
        # Image size: 28 x 28 -> 24 x 24
        conv1 = brew.conv(m, 'data', 'conv1', dim_in=1, dim_out=20, kernel=5)
        # Image size: 24 x 24 -> 12 x 12
        pool1 = brew.max_pool(m, conv1, 'pool1', kernel=2, stride=2)
        # Image size: 12 x 12 -> 8 x 8
        conv2 = brew.conv(m, pool1, 'conv2', dim_in=20, dim_out=50, kernel=5)
        # Image size: 8 x 8 -> 4 x 4
        pool2 = brew.max_pool(m, conv2, 'pool2', kernel=2, stride=2)
        # 50 * 4 * 4 stands for dim_out from previous layer multiplied by the image size
        fc3 = brew.fc(m, pool2, 'fc3', dim_in=50 * 4 * 4, dim_out=500)
        fc3 = brew.relu(m, fc3, fc3)
        pred = brew.fc(m, fc3, 'pred', 500, N_CLASSES)
        softmax = brew.softmax(m, pred, 'softmax')
        return softmax

In [4]:
def add_training_operators(softmax, m, device_opts) :
    with core.DeviceScope(device_opts):
        xent = m.LabelCrossEntropy([softmax, "label"], 'xent')
        loss = m.AveragedLoss(xent, "loss")
        brew.accuracy(m, [softmax, "label"], "accuracy")
        m.AddGradientOperators([loss])
        opt = optimizer.build_sgd(
            m,
            base_learning_rate=LR, 
            policy='fixed',
            momentum=MOMENTUM)

In [5]:
def init_model():
    # Create Place-holder for data
    workspace.FeedBlob("data", x_train[:BATCHSIZE], device_option=device_opts)
    workspace.FeedBlob("label", y_train[:BATCHSIZE], device_option=device_opts)
    
    # Initialise model
    train_model = model_helper.ModelHelper(name="train_net")
    softmax = create_model(train_model, device_opts=device_opts)
    add_training_operators(softmax, train_model, device_opts=device_opts)

    # Initialise workspace
    workspace.RunNetOnce(train_model.param_init_net)
    workspace.CreateNet(train_model.net)
    return train_model

In [6]:
def yield_mb(X, y, batchsize=64):
    assert len(X) == len(y)
    # Only complete batches are submitted
    for i in range(len(X)//batchsize):
        yield X[i*batchsize:(i+1)*batchsize], y[i*batchsize:(i+1)*batchsize]

In [7]:
%%time
# Data into format for library
x_train, x_test, y_train, y_test = mnist_for_library(channel_first=True)
# Caffe2-specific
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)

Wall time: 542 ms


In [8]:
%%time
# Initialise model
model = init_model()

Wall time: 604 ms


In [9]:
%%time
# Train model
for j in range(EPOCHS):
    for data, label in yield_mb(x_train, y_train, BATCHSIZE):
        # Run one mini-batch at time
        # IS THIS SLOWER THAN ITERATOR???
        workspace.FeedBlob("data", data, device_option=device_opts)
        workspace.FeedBlob("label", label, device_option=device_opts)
        workspace.RunNet(model.net, 1)       
    print("Finished epoch: ", j)
    print(str(j) + ': ' + str(workspace.FetchBlob("loss")))

Finished epoch:  0
0: 0.017899587750434875
Finished epoch:  1
1: 0.01151332538574934
Finished epoch:  2
2: 0.0009329636231996119
Finished epoch:  3
3: 0.002238157205283642
Finished epoch:  4
4: 0.0023911683820188046
Finished epoch:  5
5: 0.0005027520237490535
Finished epoch:  6
6: 0.0006009282660670578
Finished epoch:  7
7: 0.000260871194768697
Finished epoch:  8
8: 0.00029484747210517526
Finished epoch:  9
9: 7.719698623986915e-05
Finished epoch:  10
10: 0.0004470804997254163
Finished epoch:  11
11: 0.0005770412972196937
Wall time: 1min 28s


In [10]:
%%time
# Test model
test_model= model_helper.ModelHelper(name="test_net", init_params=False)
create_model(test_model, device_opts=device_opts)
workspace.RunNetOnce(test_model.param_init_net)
workspace.CreateNet(test_model.net, overwrite=True)

# Test one-minibatch
y_guess = np.zeros(y_test.shape, dtype=np.int)
y_truth = np.zeros(y_test.shape, dtype=np.int)
c = 0
for data, label in yield_mb(x_test, y_test, BATCHSIZE):
    workspace.FeedBlob("data", data, device_option=device_opts)
    workspace.RunNet(test_model.net, 1)
    y_guess[c*BATCHSIZE:(c+1)*BATCHSIZE] = (np.argmax(workspace.FetchBlob("softmax"), axis=-1))
    y_truth[c*BATCHSIZE:(c+1)*BATCHSIZE] = y_test[c*BATCHSIZE:(c+1)*BATCHSIZE]
    c += 1

Wall time: 348 ms


In [11]:
print("Accuracy: ", sum(y_guess == y_truth)/len(y_guess))

Accuracy:  0.9893
