In [1]:
import numpy as np
from scipy.stats import mode
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
from warnings import filterwarnings
filterwarnings('ignore')

In [2]:
import theano
import pymc3 as pm
import lasagne
import theano.tensor as T

floatX = theano.config.floatX

In [3]:
import sklearn
from sklearn import datasets
from sklearn.preprocessing import scale
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_mldata
from sklearn.metrics import accuracy_score

In [147]:
def load_dataset():
    # We first define a download function, supporting both Python 2 and 3.
    mnist = fetch_mldata('MNIST original')
    data = mnist['data'].reshape((70000, 1, 28, 28)).astype(np.float64)
    target = mnist['target'].astype(np.float32)
    # We can now download and read the training and test set images and labels.
    X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=50000)
    # We reserve the last 10000 training examples for validation.
    X_train, X_val = X_train[:-10000], X_train[-10000:]
    y_train, y_val = y_train[:-10000], y_train[-10000:]

    # We just return all the arrays in order, as expected in main().
    # (It doesn't matter how we do this as long as we can read them again.)
    return X_train, y_train, X_val, y_val, X_test, y_test

print("Loading data...")
X_train, y_train, X_val, y_val, X_test, y_test = load_dataset()
print("Finished!")

Loading data...
Finished!


In [148]:
input_shape = (None, *X_train.shape[1:])

In [149]:
class PriorWeights(object):
    def __init__(self, mode='W', prior='gauss', **params):
        self.count = 0
        self.prior = prior
        self.mode = mode
        params.setdefault('std', 1.)
        params.setdefault('hyper', None)
        self.params = params
    def __call__(self, shape):
        self.count += 1
        if self.params['hyper'] is None:
            std = self.params['std']
        elif self.params['hyper'] == 'cauchy':
            std = pm.HalfCauchy('hyper_%s%d' % (self.mode, self.count), beta=1.)
        elif self.params['hyper'] == 'normal':
            std = pm.HalfNormal('hyper_%s%d' % (self.mode, self.count), mu=0., sd=1.)
        elif self.params['hyper'] == 'flat':
            std = pm.Flat()
        if self.prior == 'gauss':
            return pm.Normal('%s%d' % (self.mode, self.count), mu=0, sd=std, 
                         testval=np.random.normal(size=shape).astype(np.float64),
                         shape=shape)
        elif self.prior == 'laplace': 
            return pm.Laplace('%s%d' % (self.mode, self.count), mu=0, b=std, 
                         testval=np.random.normal(size=shape).astype(np.float64),
                         shape=shape)
        elif self.prior == 'flat':
            return pm.Flat('%s%d' % (self.mode, self.count), 
                           testval=np.random.normal(size=shape).astype(np.float64), 
                           shape=shape)

In [150]:
def build_ann(prior_b, prior_W, input_var, target_var, 
              input_shape, params=[176, 64]):
    
    n_hid1, n_hid2 = params
    with pm.Model() as neural_network:
        l_in = lasagne.layers.InputLayer(shape=input_shape,
                                         input_var=input_var)
        l_conv1 = lasagne.layers.Conv2DLayer(
                            l_in, num_filters=32, filter_size=(5, 5),
                            b=prior_b, W=prior_W,
                            nonlinearity=lasagne.nonlinearities.tanh)
        
        l_pool1 = lasagne.layers.MaxPool2DLayer(l_conv1, pool_size=(2, 2))
        
        l_conv2 = lasagne.layers.Conv2DLayer(
                            l_pool1, num_filters=32, filter_size=(5, 5),
                            b=prior_b, W=prior_W,
                            nonlinearity=lasagne.nonlinearities.tanh)

        l_pool2 = lasagne.layers.MaxPool2DLayer(l_conv2, pool_size=(2, 2))

        l_dense1 = lasagne.layers.DenseLayer(
                            l_pool2, num_units=n_hid1,
                            b=prior_b, W=prior_W,
                            nonlinearity=lasagne.nonlinearities.tanh)

        l_dense2 = lasagne.layers.DenseLayer(
                            l_dense1, num_units=n_hid2,
                            b=prior_b, W=prior_W,
                            nonlinearity=lasagne.nonlinearities.tanh)

        l_out = lasagne.layers.DenseLayer(
                            l_dense2, num_units=10,
                            b=prior_b, W=prior_W,
                            nonlinearity=lasagne.nonlinearities.softmax,
        )

        prediction = lasagne.layers.get_output(l_out)
        
        out = pm.Categorical('out', prediction,
                           observed=target_var,
                           total_size=y_train.shape[0])
    
    return neural_network

In [151]:
def create_minibatch(data):
    rng = np.random.RandomState(0)
    
    while True:
        ixs = rng.randint(len(data), size=100)
        yield data[ixs]

In [152]:
def get_prediction(samples_proba):
    return mode(np.argmax(sample_proba(X_test, 500), 
                          axis=-1), axis=0).mode[0]

In [153]:
minibatch_X = pm.generator(create_minibatch(X_train))
minibatch_y = pm.generator(create_minibatch(y_train))
neural_network_minibatch = build_ann(PriorWeights(mode='b', prior='flat'), 
                                     PriorWeights(mode='W', prior='gauss'),
                                     minibatch_X, minibatch_y, 
                                     input_shape, params=[176, 64])

In [154]:
ADVI_ITERS = 10000
N_SAMPLES = 500

In [155]:
with neural_network_minibatch:
    inference = pm.ADVI()
    approx = pm.fit(ADVI_ITERS, method=inference)

Average Loss = 97,546: 100%|██████████| 10000/10000 [25:54<00:00,  6.40it/s]     
Finished [100%]: Average Loss = 97,523
