In [None]:
import numpy as np   
from scipy.special import erf
import random
from matplotlib import pyplot as plt
from matplotlib.pyplot import cm 
%matplotlib inline
import GPflow.kernels
from GPflow.likelihoods import Bernoulli, Gaussian
from GPflow.svgp import SVGP
from GPflow.svgp_additive import SVGP_additive2 


Generative model
* $\eta = \alpha x_0 + f(x_1,x_2) $
* $p(y|\eta) \quad \text{some likelihood}$

## Creating a dataset

In [None]:
np.random.seed(1)

N = 5000 # number of data points
D = 3 # number of covariate dimension
X = np.random.rand(N, D)-.5 # sampling covariates uniformly

# some arbitrary functions    
alpha = .5
f = lambda x1,x2 : np.sin(x1*5.)*np.sin(x2*5.)

    
# Computing the additive predictor
F = (alpha*X[:,0]+f(X[:,1],X[:,2])).reshape(N,1)

# Computing the observables

lik = 'Gaussian'
#lik = 'Bernoulli'

if lik == 'Gaussian':
    s_n = .1
    Y = F + np.random.randn(N,1) * s_n
elif lik == 'Bernoulli':
    phi =lambda x: 0.5*(1+erf(x/np.sqrt(2)))
    B = phi(F)
    Y = (np.random.rand(N,1)<B).astype(int)



### Plotting dataset

In [None]:
# plotting individual functions
l = .5
xp = np.linspace(-l,l,200)
X1g,X2g = np.meshgrid(xp,xp)
plt.imshow(f(X1g,X2g),extent=[-l,l,-l,l])
plt.xlabel('$x_1$',fontsize=20)
plt.ylabel('$x_2$',fontsize=20)
plt.title('function of 2 variables')
plt.show()


# histogram of predictor values
plt.title('Predictor values')
plt.hist(F)
plt.xlabel('$\sum_d f_d$',fontsize=20)
plt.show()

# Classification only: histogram of Bernoulli parameters
if lik == 'Bernoulli':
    plt.title('Bernoulli parameters values')
    plt.hist(phi(F))
    plt.xlabel('$\phi ( \sum_d f_d )$',fontsize=20)
    plt.show()


### Setting up model

In [None]:
# Inducing point locations 
Z = [np.array([[1]])] # one pseudo input for linear term
Z+= [np.random.rand(30, 2)-.5] # (M,1) and (M,2) array
    
# Setting likelihood
if lik == 'Gaussian':
    likelihood = Gaussian()
    likelihood.variance = 0.01
elif lik == 'Bernoulli':
    likelihood = Bernoulli()

# Setting kernels
ks = [GPflow.kernels.Linear(1)]
ks += [ GPflow.kernels.RBF(2) ]

f_indices=[[0],[1,2]] # covariate indices used by each function in additive decomposition
n_func = len(f_indices)

# Declaring model
m = SVGP_additive2(X, Y, ks, likelihood, Z,f_indices=f_indices)



### Fixing parameters
Here, we decide which parameters we want to optimize. These include
* kernel hyperparameters $\theta$
* inducing point locations $Z$
* variational parameters
* likelihood parameters (if any)

In [None]:
# --- Kernel parameters
for k in m.kerns.parameterized_list:
    #if k.name == 'linear':
    #    k.variance.fixed = True
    #if k.name == 'rbf':
    #    k.variance.fixed = True
    #    k.lengthscales.fixed = True
    pass
        
# --- Inducing points
m.Z[0].fixed = True # no need to optimize location for linear parameter
#for z in m.Z:
#    z.fixed=True

# --- Likelihood parameters
if lik == 'Gaussian':
    #m.likelihood.variance.fixed = True
    pass


# --- Variational parameters
#for qmu in m.q_mu:
    #qmu.fixed = True
#for qs in m.q_sqrt:
    #qs.fixed = True
    

### Running optimization

In [None]:
# optimizing
for k in range(2):
    m.optimize()

### Diagnosis

In [None]:
# computing predicted sum (mean and variance)
Yp, Vp = m.predict_f(X)
Sp = np.sqrt(Vp)

# computing RootMeanSquaredError 
rmse = np.sqrt(np.mean((Yp - Y) ** 2))
print "RMSE: %.3f" % rmse

# plotting true against inferred predictor
n = 100 #subselect plots
I = np.random.randint(1,len(Y),n)
fig,ax = plt.subplots()
ax.errorbar(F,Yp , yerr=np.sqrt(Sp), fmt='o')
lims = [ np.min([ax.get_xlim(), ax.get_ylim()]),
         np.max([ax.get_xlim(), ax.get_ylim()])]
ax.plot(lims, lims, 'k-', alpha=0.75, zorder=0)
ax.set_xlabel('underlying predictor',fontsize=20)
ax.set_ylabel('estimated predictor mean',fontsize=20)


### Display results

In [None]:
# Generating predictions for individual functions
Ys=[]
Vs=[]
for c in range(n_func):
    m.set_prediction_subset_ds([c])
    Yd, Vd = m.predict_f(X)
    print c
    Ys.append(Yd)
    Vs.append(Vd)

In [None]:
col=cm.rainbow(np.linspace(0,1,D))
w = 5

# plotting infered functions against true functions
for c in range(n_func):
    Yd = Ys[c]
    Vd = Vs[c]

    if len(f_indices[c])==1:
        fig1,ax1 = plt.subplots()
        d = f_indices[c][0]
        o = np.argsort(X[:,d])
        ax1.plot(X[o,d],alpha*X[o,d],'--',linewidth=4,c=col[d])
        ax1.plot(X[o,d],Yd[o],'-',c=col[d])
        ax1.fill_between(X[o,d],
                         y1=np.squeeze(Yd[o]+np.sqrt(Vd[o])),
                         y2=np.squeeze(Yd[o]-np.sqrt(Vd[o])),facecolor=col[d],alpha=.5)
        ax1.set_xlabel('$x$ ',fontsize=20)
        ax1.set_ylabel('$\\alpha x$ ',fontsize=20)
        plt.show()
        
    elif len(f_indices[c])==2:
        fig1,ax1 = plt.subplots()
        ax1.scatter(X[o,f_indices[c][0]],
                    X[o,f_indices[c][1]],
                    c=Yd[o])
        ax1.set_xlabel('$x_1$ ',fontsize=20)
        ax1.set_ylabel('$x_2$ ',fontsize=20)
        ax1.set_title('$f(x_1,x_2)$',fontsize=20)
        plt.show()
        
        

# plotting infered functions against true functions

for c in range(n_func):
    
    Yd = Ys[c]
    Vd = Vs[c]

    if len(f_indices[c])==1:
        d = f_indices[c][0]
        fig,ax = plt.subplots()
        ax.errorbar(alpha*X[o,d], Yd[o], yerr=np.sqrt(Vd), fmt='o')
        lims = [ np.min([ax.get_xlim(), ax.get_ylim()]),
                 np.max([ax.get_xlim(), ax.get_ylim()])]
        ax.plot(lims, lims, 'k-', alpha=0.75, zorder=0)
        ax.set_xlabel('underlying predictor',fontsize=20)
        ax.set_ylabel('estimated predictor mean',fontsize=20)
        plt.show()
        
    elif len(f_indices[c])==2:
        fig,ax = plt.subplots()
        ax.errorbar(F[o], Yd[o], yerr=np.sqrt(Vd), fmt='o')
        lims = [ np.min([ax.get_xlim(), ax.get_ylim()]),
                 np.max([ax.get_xlim(), ax.get_ylim()])]
        ax.plot(lims, lims, 'k-', alpha=0.75, zorder=0)
        ax.set_xlabel('underlying predictor',fontsize=20)
        ax.set_ylabel('estimated predictor mean',fontsize=20)
        
        plt.show()

plt.show()

