We create logistic regression model (i.e. multi-layer perceptron with no hidden layer), and evaluate it on the canonical MNIST dataset. 

In [1]:
import os
import gzip
import numpy as np

import autodiff as ad
from autodiff import initializers
from autodiff import optimizers

random_state = np.random.RandomState(0)

In [2]:
def read_mnist_labels(fn):
  with gzip.open(fn, 'rb') as f:
    content = f.read()
    num_images = int.from_bytes(content[4:8], byteorder='big')
    labels = np.zeros((num_images, 10), dtype=np.float32)
    indices = np.fromstring(content[8:], dtype=np.uint8)
    labels[range(num_images), indices] += 1
    
  return labels  

In [3]:
def read_mnist_images(fn):
  with gzip.open(fn, 'rb') as f:
    content = f.read()
    num_images = int.from_bytes(content[4:8], byteorder='big')
    height = int.from_bytes(content[8:12], byteorder='big')
    width = int.from_bytes(content[12:16], byteorder='big')
    images = np.fromstring(content[16:], dtype=np.uint8).reshape((num_images, height, width))
  images = images.astype(np.float32) / 255.  
  return images  

Make sure you have the downloaded the following 4 files, and place them under the current directory.

In [4]:
train_images = read_mnist_images('train-images-idx3-ubyte.gz')
train_labels = read_mnist_labels('train-labels-idx1-ubyte.gz')
test_images = read_mnist_images('t10k-images-idx3-ubyte.gz')
test_labels = read_mnist_labels('t10k-labels-idx1-ubyte.gz')

Build a logistic regression model with l2 regularization.

In [5]:
reg = 1e-3
tni = initializers.TruncatedNormalInitializer(mean=0.0, stddev=0.01, seed=0)
zi = initializers.ZerosInitializer()
gd = optimizers.GradientDescentOptimizer(alpha=0.5)

inputs = ad.placeholder((None, 784))
labels = ad.placeholder((None, 10))
weight = ad.variable((784, 10), tni)
bias = ad.variable((10,), zi)

logits = ad.matmul(inputs, weight) + bias
loss = ad.reduce_mean(ad.softmax_cross_entropy_loss(labels, logits))
loss = loss + ad.l2norm(weight, reg)

setup the graph and runtime

In [6]:
graph = ad.get_default_graph()
graph.initialize_variables()

runtime = ad.RunTime()
graph.set_runtime(runtime)

Training stage: run forward backward cycles on the computational graph.

In [7]:
batch_size = 100
for i in range(1000):
  which = random_state.choice(train_images.shape[0], batch_size, False)
  inputs_val = train_images[which].reshape((batch_size, -1))
  labels_val = train_labels[which]

  feed_dict = {inputs: inputs_val, labels: labels_val}
  
  with runtime.forward_backward_cycle():
    gd.optimize(loss, feed_dict)

    if i % 100 == 0:
      loss_val = loss.forward(feed_dict)
      logits_val = logits.forward(feed_dict)
      print('step: %d, loss: %f, accuracy: %f' % (i, loss_val, np.mean(np.argmax(logits_val, axis=1) == np.argmax(labels_val, axis=1))))


step: 0, loss: 2.308650, accuracy: 0.110000
step: 100, loss: 0.331418, accuracy: 0.920000
step: 200, loss: 0.447669, accuracy: 0.890000
step: 300, loss: 0.310958, accuracy: 0.920000
step: 400, loss: 0.391424, accuracy: 0.920000
step: 500, loss: 0.270926, accuracy: 0.940000
step: 600, loss: 0.309336, accuracy: 0.940000
step: 700, loss: 0.341731, accuracy: 0.910000
step: 800, loss: 0.349404, accuracy: 0.900000
step: 900, loss: 0.349697, accuracy: 0.900000


At this point we are out of the scope of an active `RunTime`, so its attributes should all be empty.

In [8]:
assert not runtime._fwval
assert not runtime._bwval
assert not runtime._cache_data

But `Variables` still hold their updated values. So we can save the logistic regression variable weights to a file.

In [9]:
graph.save_variables('lr_weights')

And then restore from it.

In [10]:
var_dict = np.load('lr_weights.npy').item()

graph.initialize_variables(var_dict=var_dict)

Evaluate on test set using the restored variable weights.

In [11]:
feed_dict = {inputs: test_images.reshape((-1, 784))}

with runtime.forward_backward_cycle():
  logits_val = logits.forward(feed_dict)
print('accuracy', np.mean(np.argmax(logits_val, axis=1) == np.argmax(test_labels, axis=1)))

accuracy 0.9159
