In [1]:
import numpy as np

import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"

from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

import tensorflow as tf

if tf.test.is_built_with_cuda():
    print("The installed version of TensorFlow includes GPU support.")
else:
    print("The installed version of TensorFlow does not include GPU support.")
    
tf.logging.set_verbosity(0)
import pandas as pd
import matplotlib 
%matplotlib inline

plt = matplotlib.pyplot
from gpflow.likelihoods import Gaussian
from gpflow.likelihoods import Ordinal
from gpflow.kernels import RBF, White
from gpflow.models.gpr import GPR
from gpflow.training import AdamOptimizer, ScipyOptimizer
from gpflow.kernels import Matern32
from scipy.cluster.vq import kmeans2
from scipy.stats import norm
from scipy.special import logsumexp
from doubly_stochastic_dgp.dgp import DGP
import gpflow
matplotlib.rcParams['figure.figsize'] = (12, 6)

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 18177090429243490066
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 7905922253
locality {
  bus_id: 1
  links {
  }
}
incarnation: 16634289191896804325
physical_device_desc: "device: 0, name: GeForce GTX 1070 Ti, pci bus id: 0000:01:00.0, compute capability: 6.1"
]
The installed version of TensorFlow includes GPU support.


**Model**

In [2]:
def make_dgp_models(X, Y, Z,likelihood):
    models, names = [], []
    for L in range(1, 5):
        D = X.shape[1]

        # the layer shapes are defined by the kernel dims, so here all hidden layers are D dimensional 
        kernels = []
        for l in range(L):
            kernels.append(RBF(D,variance = 2.5,lengthscales=1.5))

        # between layer noise (doesn't actually make much difference but we include it anyway)
        for kernel in kernels[:-1]:
            white = White(D, variance=1e-5)
#             tf.is_variable_initialized(white.variance).mark_used()
            kernel += white

        mb = 1000 if X.shape[0] > 1000 else None 
        model = DGP(X, Y, Z, kernels, likelihood, num_samples=10, minibatch_size=mb)

        # start the inner layers almost deterministically 
        for layer in model.layers[:-1]:
            layer.q_sqrt = layer.q_sqrt.value * 1e-5

        models.append(model)
        names.append('DGP{} {}'.format(L, len(Z)))
        model.compile()
    
    return models, names



**Prediction**<br>
We'll calculate test absolute error in batches (so the larger datasets don't cause memory problems)

For the DGP models we need to take an average over the samples for the absolute error. The predict_density function already does this internally

In [3]:
def batch_assess(model, assess_model, X, Y):
    n_batches = max(int(X.shape[0]/1000.), 1)
    AbsErr = []
    for X_batch, Y_batch in zip(np.array_split(X, n_batches), np.array_split(Y, n_batches)):
        abserr = assess_model(model, X_batch, Y_batch)
        AbsErr.append(abserr)
        
    MAErr = np.average(np.concatenate(AbsErr, 0))
    
    return MAErr

S = 100
def assess_sampled(model, X_batch, Y_batch):
    m, v = model.predict_y(X_batch, S)
    
    mean = np.average(m, 0)
    abserr = np.absolute((mean - Y_batch))
    return abserr

In [4]:
iterations_few = 100
iterations_many = 5000
s = '{:<16}  Mean Absolute Error : {:.4f} +- {:.4f}'
iterations = 10000

In [5]:
testString =  "./abalone_dataset/10bins/abalone_test_10."
trainString = "./abalone_dataset/10bins/abalone_train_10."

MAErr=[]

for j in range(1,21):
    flag = True
    dfTest = pd.read_csv(testString + str(j),header = None,sep = ' ')
    dfTrain = pd.read_csv(trainString +  str(j),header = None,sep = ' ')
    train= np.array(dfTrain)
    Y = train[:,-1:]
    X = train[:, :-1]
    test  = np.array(dfTest)
    YT = test[:,-1:]
    XT = test[:, :-1]
    YT = YT -1
    Y = Y -1
#     bin_edges = []
#     uniqueY = np.unique(Y)
#     for i in range(uniqueY.size - 1 ):
#         bin_edges.append((uniqueY[i] + uniqueY[i+1])/2)
#     bin_edges = np.array(bin_edges)    
    
    # construct ordinal likelihood - bin_edges is the same as unique(Y) but centered
    bin_edges = np.array(np.arange(np.unique(Y).size - 1), dtype=float)
    bin_edges = bin_edges - bin_edges.mean()
    
    # bin_edges = bin_edges - bin_edges.mean()
    likelihood= Ordinal(bin_edges)
    Z = kmeans2(X, 100, minit='points')[0]
    models_dgp, names_dgp = make_dgp_models(X, Y, Z,likelihood)
    maerr=[]
    for m, name in zip(models_dgp, names_dgp):
        AdamOptimizer(0.01).minimize(m, maxiter=iterations)
        maerr.append(batch_assess(m, assess_sampled, XT, YT))
    MAErr.append(maerr)

MAErr=np.array(MAErr)    


  
# tempInc = 2
# temp1 = 3
# temp2 = 0
# i =0 
# #     while tempInc > .00001:
# #         if(flag):
# AdamOptimizer(0.01).minimize(m,maxiter=4000)
# #         flag = false
# #         AdamOptimizer(0.01).minimize(m,maxiter=500)
# #     ScipyOptimizer().minimize(m,maxiter=4000)
# Xs = XT
# S = 10
# mean,var= m.predict_y(Xs,S)
# fmean = np.average(mean,0)
# fvar = np.average(var,0)
# #     temp2 = np.average(np.absolute((np.round(fmean) - YT)))
# #     tempInc = temp1 - temp2
# #     temp1 = temp2
# print(testString + str(j))
# print(np.average(np.absolute((np.round(fmean) - YT))))
# #     i = i +1 
# #     print(i )


10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10
10 10


In [6]:
meanMAErr = np.mean(MAErr,0)
stdMAErr = np.std(MAErr,0)

for i in range(len(names_dgp)):
    print(s.format(names_dgp[i],meanMAErr[i],stdMAErr[i]))

DGP1 100          Mean Absolute Error : 0.5805 +- 0.0051
DGP2 100          Mean Absolute Error : 0.5836 +- 0.0047
DGP3 100          Mean Absolute Error : 0.5839 +- 0.0046
DGP4 100          Mean Absolute Error : 0.5847 +- 0.0049
