In [1]:
import scipy.io
mat = scipy.io.loadmat('LandmineData-.mat')
v1 = mat.get('feature')
v2 = mat.get('label')
import sys
import numpy as np
import matplotlib.pyplot as plt
import gpflow
import tensorflow as tf
from gpflow.ci_utils import ci_niter
sys.path.append('../code')
from gpar.regression import GPARRegressor
from gplar_q_separate import GPLARmodel


In [None]:
roc_N_20 = []
N = 40
for trial in range(10):
    x_train,y_train, x_test,y_test = [],[],[],[]

    for i in range(10):
        index = np.random.choice(range(v1[0,i].shape[0]),N)
        index_test = np.setdiff1d(range(v1[0,i].shape[0]),index)
        x_train.append(v1[0,i][index])
        x_test.append(v1[0,i][index_test])

        y_i = v2[0,i].astype(dtype=np.float64)
        y_train.append(y_i[index])
        y_test.append(y_i[index_test])
        
    if N*10 < 300:
        M = N*10
        x_ind = None
    else: 
        M=300
        x_gpar = np.concatenate(x_train,axis=0)
        x_ind = scipy.cluster.vq.kmeans2(x_gpar, M, minit='points')[0] 

    
    gpar = GPARRegressor(scale=2.0,
                    linear=True, linear_scale=1.,
                    nonlinear=True, nonlinear_scale=1.,
                    noise=0.01,
                    impute=True, replace=True, normalise_y=True,
                    x_ind =x_ind)

    gplar = GPLARmodel(x_train, y_train, M, gpar, white=False,
                       scale=2.0, scale_tie=True, linear=True, linear_scale=1.,
                       nonlinear=True, nonlinear_scale=1.,
                       noise_inner=1e-5, noise_obs=0.001,
                       minibatch_size=500,
                       num_samples=1)
    gplar_likelihoods = []
    for i in range(10):
        gplar_likelihoods.append(gpflow.likelihoods.Bernoulli())
    gplar.likelihoods = gplar_likelihoods

    @tf.function(autograph=False)
    def optimization_step(optimizer, model, data):
        with tf.GradientTape(watch_accessed_variables=False) as tape:
            tape.watch(model.trainable_variables)
            objective = -model.maximum_log_likelihood_objective(*data)
            grads = tape.gradient(objective, model.trainable_variables)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))
        return objective
    
    
    def run_adam(model, data, iterations):
        logf = []
        adam = tf.optimizers.Adam(0.001)
        for step in range(iterations):
            neg_elbo= optimization_step(adam, model, data)
            elbo = -neg_elbo
            if step%100 == 0:
                logf.append(elbo.numpy())
            if step%1000 == 0:
                print(elbo.numpy())
        return logf

    maxiter = ci_niter(20000)
    logf = run_adam(gplar, (x_train, y_train), maxiter)
    
    from sklearn.metrics import roc_auc_score
    
    roc = []
    for x,y,i in zip(x_test, y_test, range(len(x_test))):
        mean, var = gplar._predict(x, i+1, full_cov=False, S=100)
        mean = np.mean(mean,axis=0)
        roc_i = roc_auc_score(y,mean)
        roc.append(max(roc_i,1-roc_i))
    print("roc average:",np.mean(roc))
    roc_N_20.append(roc)