In [None]:
import sys 
sys.path.append('nngp')

import numpy as np
import tensorflow as tf

from sklearn.datasets import fetch_mldata
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer 
from sklearn.metrics import mean_squared_error

from nngp.nngp import NNGPKernel
from nngp.gpr import GaussianProcessRegression

print(tf.__version__)

In [None]:
mnist = fetch_mldata('MNIST original', data_home='mnist')
mnist.DESCR

In [None]:
lb = LabelBinarizer()
lb.fit(mnist.target)
# encode target labels as zero-mean one hot encoded vector 
# with negative class = -0.1 and positive class as 0.9
encode = lambda y: lb.transform(y) - .1

In [None]:
image_train, image_test, label_train, label_test = train_test_split(
    mnist.data, mnist.target, stratify = mnist.target, random_state = 444, test_size=.15)

X_test = image_test
y_test = encode(label_test)

X_train = image_train
y_train = encode(label_train)

In [None]:
def get_train_subset(n):
    x, _, y, _ = train_test_split(image_train, label_train, stratify = label_train, random_state=333, train_size = n)
    return x, encode(y)

In [None]:
def train_or_load_model(nn_width, sample_size):
    grid_path = 'grids/mnist_w%d_s%d' % (nn_width, sample_size)
    nngp_kernel = NNGPKernel(
        depth = nn_width,
        weight_var=1.79,
        bias_var=0.83,
        nonlin_fn= tf.tanh,
        grid_path = grid_path,
        use_precomputed_grid = True,
        n_gauss=501,
        n_var=501,
        n_corr=501,
        max_gauss=10,
        max_var=100,
        use_fixed_point_norm=False)
    X_train, y_train = get_train_subset(sample_size)
    return GaussianProcessRegression(X_train, y_train, kern=nngp_kernel)

In [None]:
def accuracy(y, y_hat):
    return np.mean(np.argmax(y, axis = 1) == np.argmax(y_hat, axis = 1))

In [None]:
sample_sizes = np.array([1000, 5000, 20000, 50000])
nn_widths = np.array([16, 64, 256, 1024])

results = np.empty((sample_sizes.shape[0], nn_widths.shape[0], 2))

for i, sample_size in enumerate(sample_sizes):
    for j, nn_width in enumerate(nn_widths):
        config = tf.ConfigProto()
        config.gpu_options.allow_growth=True
        with tf.Session(config=config) as sess:
            m = train_or_load_model(nn_width = nn_width, sample_size = sample_size)
            y_hat,  _ = m.predict(X_test, sess)
            results[i, j, 0] = accuracy(y_test, y_hat)  
            results[i, j, 1] = mean_squared_error(y_test, y_hat)
            print('done width %d sample size %d' % (nn_width, sample_size))
            print('accuarcy %.3f mean squared error %.3f' % (results[i, j, 0], results[i, j, 1]))