In [5]:
import gpflow
import tensorflow as tf
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import scipy.io
plt.style.use('ggplot')
%matplotlib inline

import os
os.environ["CUDA_VISIBLE_DEVICES"] = ''

import keras
from keras.datasets import mnist
from keras import backend as K
 

xs_train_full = np.genfromtxt("../data/mnist_train_features.csv", delimiter=",")
xs_test_full = np.genfromtxt("../data/mnist_test_features.csv", delimiter=",")

def get_mnist_classes():
    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    # convert class vectors to binary class matrices
    y_train = keras.utils.to_categorical(y_train, 10)
    y_test = keras.utils.to_categorical(y_test, 10)
    return ((x_train, x_test), (y_train, y_test))

((mnist_train_full, mnist_test_full), (ys_train_full, ys_test_full)) = get_mnist_classes()
correct_classes = np.argmax(ys_test_full, axis=1)


In [2]:
# fitting entirety of model on base MNIST data

num_features = xs_train_full.shape[-1]
num_classes = 10
print("Using", num_features, "features, predicting", num_classes, "classes")

ys = np.argmax(ys_train_full, axis=1)

# Matern12, white var=0.1, ::25, minibatch=8k, kern.white.var.train = False, feature.trainable=True

# Matern 32, White var=0.1, ::20, minibatch = 8000, kern.white.variance.trainable = True, feature.trainable = False
#   Note: this has interesting properties for AWGN results: hybrid model outperforms both. But in other cases not...
# Best:
# ! Matern 32, White var=0.1, ::20, minibatch = 8000, kern.white.variance.trainable = False(feature.trainable = True)

# Matern52, White var=0.1, ::25, minibatch=8000, white trainable=False

# Linear, white var=0.1, ::20, minibatch=8k, white trainable =False
gp_model = gpflow.models.SVGP(
    xs_train_full, ys, kern=gpflow.kernels.Linear(input_dim=num_features) + gpflow.kernels.White(input_dim=num_features, variance=0.1),
    likelihood=gpflow.likelihoods.MultiClass(num_classes),
    Z=xs_train_full[::20].copy(), 
    num_latent=num_classes, 
    whiten=True, 
    q_diag=True,
    minibatch_size=8000)
gp_model.kern.white.variance.trainable = False
#gp_model.feature.trainable = False

Using 128 features, predicting 10 classes


In [3]:
opt = gpflow.train.ScipyOptimizer()
opt.minimize(gp_model)

INFO:tensorflow:Optimization terminated with:
  Message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
  Objective function value: 4515.149993
  Number of iterations: 11
  Number of functions evaluations: 20


In [4]:
(mu, var) = gp_model.predict_y(xs_test_full)
gp_predicted_classes = np.argmax(mu, axis=1)
print("Num GP incorrect: ", np.count_nonzero(gp_predicted_classes != correct_classes))

Num GP incorrect:  102


In [5]:
gp_model.as_pandas_table()

Unnamed: 0,class,prior,transform,trainable,shape,fixed_shape,value
SVGP/kern/white/variance,Parameter,,+ve,False,(),True,0.1
SVGP/kern/linear/variance,Parameter,,+ve,True,(),True,1.047442193194886
SVGP/q_mu,Parameter,,(none),True,"(3000, 10)",True,"[[-2.3135743513, -2.81427481817, -4.4071718795..."
SVGP/feature/Z,Parameter,,(none),True,"(3000, 128)",True,"[[3.88213660939, 0.18223541999, 2.85461201275,..."
SVGP/q_sqrt,Parameter,,+ve,True,"(3000, 10)",True,"[[0.343735744257, 0.3221284087, 0.268484167645..."


In [2]:
# get the full CNN
mnist_cnn = keras.models.load_model('../models/mnist_cnn.h5')

In [3]:
img_rows, img_cols = 28, 28
if K.image_data_format() == 'channels_first':
    mnist_test_full_reshape = mnist_test_full.reshape(xs_test_full.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    mnist_test_full_reshape = mnist_test_full.reshape(xs_test_full.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)
    

mnist_test_full_reshape = mnist_test_full_reshape.astype('float32')
mnist_test_full_reshape /= 255

In [6]:
cnn_test_probs = mnist_cnn.predict_on_batch(mnist_test_full_reshape)
cnn_predicted_classes = np.argmax(cnn_test_probs, axis=1)
print("Num CNN incorrect:", np.count_nonzero(cnn_predicted_classes != correct_classes))

Num CNN incorrect: 89


In [10]:
print(np.count_nonzero(cnn_predicted_classes != correct_classes)/10000)

0.9911


In [58]:
# Combined model


""" 
params:
    cnn_probs
    gp_probs,
    gp_vars,
    verbose,
    accept_cnn_stddev
"""

def combined_predict_efficient(cnn_probs_all, gp_mu, gp_var, verbose=False, accept_cnn_stddev = 1.0):
    assert (cnn_probs_all.shape[0] == gp_mu.shape[0] == gp_var.shape[0])
    decisions = []
    decision_probs = []
    decision_vars = []
    for (mu, var, cnn_probs) in zip(gp_mu, gp_var, cnn_probs_all):
        cnn_class = np.argmax(cnn_probs)
        gp_class = np.argmax(mu)
        
        gp_pred_prob = mu[gp_class]
        gp_pred_var = var[gp_class]
        
        cnn_pred_prob = cnn_probs[cnn_class]
        
        # both classes agree
        if gp_class == cnn_class:
            # we may have to accept the wrong decision but can't do anything about it
            #if verbose:
            #    print("Models agree on predicted class")
            decisions.append([0, gp_class, gp_pred_prob, gp_pred_var])
            decision_probs.append(mu)
            decision_vars.append(var)
        else:
            # disagreement! This is additional information
            # From prior experiments we suspect that NN is more likely to be correct [non-adverserial examples tested]
            # So, if we take the CNN prediction and check if it's the same as the _second_ highest GP prediction
            # try using that?

            # Revised:
            #  Take the CNN prediction IF it's probability is within 1 stddev of the corresponding GP class probability

            # core idea: if CNN is _too_ sure then we revert to GP prediction -- might be adverserial...?

            gp_prob_for_cnn_pred = mu[cnn_class]
            gp_stddev_for_cnn_pred = np.sqrt(var[cnn_class])

            if verbose:
                print("Models disagree on predicted class")

            if cnn_pred_prob < (gp_prob_for_cnn_pred + accept_cnn_stddev*gp_stddev_for_cnn_pred) \
               and cnn_pred_prob > (gp_pred_prob - accept_cnn_stddev*np.sqrt(gp_pred_var)):
                if verbose:
                    print("  Taking CNN prediction p=", cnn_pred_prob, ", probability is within", accept_cnn_stddev, "stddev of GP probability p=", gp_prob_for_cnn_pred)
                decisions.append([1, cnn_class, cnn_pred_prob, -1])
                decision_probs.append(cnn_probs)
                decision_vars.append([-1 for x in range(mu.shape[-1])])

            else:
                if verbose:
                    print("  Taking GP prediction")
                decisions.append([0, gp_class, gp_pred_prob, gp_pred_var])
                decision_probs.append(mu)
                decision_vars.append(var)
    return (np.array(decisions), np.array(decision_probs), np.array(decision_vars))



In [42]:


#combined_pred, combined_mus, combined_vars = combined_predict(cnn=mnist_cnn, gp=gp_model, images=mnist_test_full_reshape, accept_cnn_stddev=1.0)
combined_pred, combined_mus, combined_vars = combined_predict_efficient(cnn_test_probs, mu, var, accept_cnn_stddev=1.0)
combined_pred_classes = combined_pred[:, 1]
combined_incorrect = combined_pred_classes != correct_classes
print("Num Combined incorrect: ", np.count_nonzero(combined_incorrect))

Num Combined incorrect:  96


In [12]:
# Have all 3 of MNIST CNN, trained GP, and combined model!

In [13]:
# Preprocess the N-MNIST data just like standard MNIST
nmnist_noisy = scipy.io.loadmat('../data/raw/n-mnist/nmnist-awgn.mat')
nmnist_blur = scipy.io.loadmat('../data/raw/n-mnist/nmnist-blur.mat')
nmnist_noisy_lowcontrast = scipy.io.loadmat('../data/raw/n-mnist/nmnist-contrast.mat')

In [14]:
from keras import backend as K
import keras.utils

def preprocess_mnist(xs, ys, one_hot_encode=-1):
    img_rows, img_cols = 28, 28    
    # reshape to inputs to correct shape
    if K.image_data_format() == 'channels_first':
        xs = xs.reshape(xs.shape[0], 1, img_rows, img_cols)
        input_shape = (1, img_rows, img_cols)
    else:
        xs = xs.reshape(xs.shape[0], img_rows, img_cols, 1)
        input_shape = (img_rows, img_cols, 1)
    xs = xs.astype('float32')
    xs /= 255
    
    if one_hot_encode != -1:
        ys = keras.utils.to_categorical(ys, one_hot_encode) # one_hot_encode is the number of classes
    
    return (xs, ys)

In [15]:
feature_extractor = K.function([mnist_cnn.layers[0].input, K.learning_phase()],
                               [mnist_cnn.layers[6].output])

In [63]:
# Process noisy mnist

awgn_x_test, awgn_y_test = preprocess_mnist(nmnist_noisy['test_x'], nmnist_noisy['test_y'])
awgn_features = feature_extractor([awgn_x_test, 0])[0]
awgn_correct_classes = np.argmax(awgn_y_test, axis=1)

awgn_gp_mu, awgn_gp_var = gp_model.predict_y(awgn_features)
awgn_gp_preds = np.argmax(awgn_gp_mu, axis=1)
awgn_gp_incorrect = awgn_gp_preds != awgn_correct_classes
print("AWGN: gp incorrect: ", np.count_nonzero(awgn_gp_incorrect))

awgn_cnn_probs = mnist_cnn.predict_on_batch(awgn_x_test)
awgn_cnn_preds = np.argmax(awgn_cnn_probs, axis=1)
awgn_cnn_incorrect = awgn_cnn_preds != awgn_correct_classes
print("AWGN: cnn incorrect: ", np.count_nonzero(awgn_cnn_incorrect))

awgn_combined_pred, awgn_combined_mus, awgn_combined_vars = combined_predict_efficient(awgn_cnn_probs, awgn_gp_mu, awgn_gp_var, accept_cnn_stddev=1.0)
awgn_combined_pred_classes = awgn_combined_pred[:, 1]
awgn_combined_incorrect = awgn_combined_pred_classes != awgn_correct_classes
print("AWGN: combined incorrect: ", np.count_nonzero(awgn_combined_incorrect))

AWGN: gp incorrect:  482
AWGN: cnn incorrect:  462
AWGN: combined incorrect:  461


In [64]:
# Process blurred mnist
blur_x_test, blur_y_test = preprocess_mnist(nmnist_blur['test_x'], nmnist_blur['test_y'])
blur_features = feature_extractor([blur_x_test, 0])[0]
blur_correct_classes = np.argmax(blur_y_test, axis=1)

blur_gp_mu, blur_gp_var = gp_model.predict_y(blur_features)
blur_gp_preds = np.argmax(blur_gp_mu, axis=1)
blur_gp_incorrect = blur_gp_preds != blur_correct_classes
print("Blur: gp incorrect: ", np.count_nonzero(blur_gp_incorrect))

blur_cnn_probs = mnist_cnn.predict_on_batch(blur_x_test)
blur_cnn_preds = np.argmax(blur_cnn_probs, axis=1)
blur_cnn_incorrect = blur_cnn_preds != blur_correct_classes
print("Blur: cnn incorrect: ", np.count_nonzero(blur_cnn_incorrect))

blur_combined_pred, blur_combined_mus, blur_combined_vars = combined_predict_efficient(blur_cnn_probs, blur_gp_mu, blur_gp_var, accept_cnn_stddev=1.0)
blur_combined_pred_classes = blur_combined_pred[:, 1]
blur_combined_incorrect = blur_combined_pred_classes != blur_correct_classes
print("Blur: combined incorrect: ", np.count_nonzero(blur_combined_incorrect))

Blur: gp incorrect:  644
Blur: cnn incorrect:  542
Blur: combined incorrect:  555


In [65]:
# Process low contrast mnist
lcontrast_x_test, lcontrast_y_test = preprocess_mnist(nmnist_noisy_lowcontrast['test_x'], nmnist_noisy_lowcontrast['test_y'])
lcontrast_features = feature_extractor([lcontrast_x_test, 0])[0]
lcontrast_correct_classes = np.argmax(lcontrast_y_test, axis=1)

lcontrast_gp_mu, lcontrast_gp_var = gp_model.predict_y(lcontrast_features)
lcontrast_gp_preds = np.argmax(lcontrast_gp_mu, axis=1)
lcontrast_gp_incorrect = lcontrast_gp_preds != lcontrast_correct_classes
print("lcontrast: gp incorrect: ", np.count_nonzero(lcontrast_gp_incorrect))

lcontrast_cnn_probs = mnist_cnn.predict_on_batch(lcontrast_x_test)
lcontrast_cnn_preds = np.argmax(lcontrast_cnn_probs, axis=1)
lcontrast_cnn_incorrect = lcontrast_cnn_preds != lcontrast_correct_classes
print("lcontrast: cnn incorrect: ", np.count_nonzero(lcontrast_cnn_incorrect))


lcontrast: gp incorrect:  2158
lcontrast: cnn incorrect:  2285


In [67]:
lcontrast_combined_pred, lcontrast_combined_mus, lcontrast_combined_vars = combined_predict_efficient(lcontrast_cnn_probs, lcontrast_gp_mu, lcontrast_gp_var, accept_cnn_stddev=0.5, verbose=False)
lcontrast_combined_pred_classes = lcontrast_combined_pred[:, 1]
lcontrast_combined_incorrect = lcontrast_combined_pred_classes != lcontrast_correct_classes
print("lcontrast: combined incorrect: ", np.count_nonzero(lcontrast_combined_incorrect))

lcontrast: combined incorrect:  2154


In [None]:
"""
# best Matern12 results:
AWGN: gp incorrect:  360
AWGN: cnn incorrect:  462
AWGN: combined incorrect:  424
Blur: gp incorrect:  1254
Blur: cnn incorrect:  542
Blur: combined incorrect:  677
lcontrast: gp incorrect:  2848
lcontrast: cnn incorrect:  2285
lcontrast: combined incorrect:  2880


# best Matern32 results:
AWGN: gp incorrect:  320
AWGN: cnn incorrect:  462
AWGN: combined incorrect:  382
Blur: gp incorrect:  1280
Blur: cnn incorrect:  542
Blur: combined incorrect:  778
lcontrast: gp incorrect:  4135
lcontrast: cnn incorrect:  2285
lcontrast: combined incorrect:  3058
        
# best Matern52 results:
AWGN: gp incorrect:  388
AWGN: cnn incorrect:  462
AWGN: combined incorrect:  399
Blur: gp incorrect:  1304
Blur: cnn incorrect:  542
Blur: combined incorrect:  856
lcontrast: gp incorrect:  4809
lcontrast: cnn incorrect:  2285
lcontrast: combined incorrect:  3537

# best Linear results:
AWGN: gp incorrect:  438
AWGN: cnn incorrect:  462
AWGN: combined incorrect:  407
Blur: gp incorrect:  2016
Blur: cnn incorrect:  542
Blur: combined incorrect:  1529
lcontrast: gp incorrect:  5735
lcontrast: cnn incorrect:  2285
lcontrast: combined incorrect:  3887

"""