In [1]:
from scipy.spatial.distance import cdist
import commoncode as commoncode
import numpy as np
import matplotlib.pyplot as plt
import SkewGP as SkewGP
import GPy as GPy
from matplotlib import pyplot as plt
from scipy.stats import bernoulli
from scipy.stats import norm
import pymc3 as pm

def generate_data(x,y,typed,iref,int_halfsize):
    Y=[]
    C=[]
    Wp=[]
    Zp=[]
    Pref=[]
    ni=len(iref)
    for i in range(len(typed)):
        if typed[i]=='numeric':
            Y.append(y[i])
            c=np.zeros(len(x))
            c[i]=1.0
            C.append(c)
        elif typed[i]=='preference':
            jref = np.random.choice(iref)
            w=np.zeros(len(x))
            if y[i]> y[jref]:
                w[i]=1.0
                w[jref]=-1.0
                Pref.append([i,jref])
            else:
                w[i]=-1.0
                w[jref]=1.0
                Pref.append([jref,i])
            Zp.append(np.array([0.0]))
            Wp.append(w)
    return np.vstack(Y),np.vstack(C),Zp,Wp,Pref

def plot_data(x,y,Y,C,Zp,Wp,typed,figsize=(10,4),shifty=0.2):
    plt.figure(figsize=figsize)
    for i in range(Y.shape[0]):
        plt.scatter(X[np.where(C[i,:]==1)[0]],Y[i],color='C0',Zorder=1000)
   
    Wp1=np.vstack(Wp)
    for i in range(Wp1.shape[0]):
        ii=np.where(Wp1[i,:]!=0)[0]
        cc='goldenrod'
        yref=shifty 
        if y[ii[0]]-y[ii[1]]>0:
            yref=-shifty
        plt.scatter(x[ii[1]],yref,color=cc,Zorder=1000)


## We  generate some data

In [2]:
σ=0.1
noise_variance=σ**2

def fun(x,noise_std=0.1):
    v=(4*np.sin(x/2*np.pi)-0.5*np.sin(2*x*np.pi))/6+np.random.randn(len(x))*noise_std
    return v
np.random.seed(42)
x = np.linspace(0,5,60)
n=len(x)

y = fun(x,σ)
typed =['numeric']*30+['preference']*30 
typed=np.array(typed)
iref=[23]
Y,C,Z,W,Pref=generate_data(x,y,typed,iref,0.06)
X=x.reshape(-1,1)
W=np.vstack(W)
Z=np.vstack(Z)
plot_data(x,y,Y,C,Zplist,Wplist,typed,figsize=(16,4),shifty=1)#
xx=np.linspace(0,5.1,100)
plt.plot(xx,fun(xx,noise_std=0.0),color='b',linestyle=':',zorder=0)
plt.scatter(x[iref],y[iref],color='red',zorder=1000)
plt.xlim([0,5.1])
plt.xlabel("x",fontsize=16)
plt.ylabel("y",fontsize=16)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.tight_layout()

NameError: name 'Zplist' is not defined

The dotted line represents the function we used to generate the observations. The  left (blue) points are numeric (non-noisy) observations and the right points represent preferences. We used the colored points (red and gold) to visualise the $30$ preferential observations. The meaning of these points is as follows: (i)  the value of the functions computed at the $x$s corresponding to the bottom gold  points is less than the value of the function computed at the $x$ corresponding to the red point; (ii) the value of the function computed at the $x$s corresponding to the the  top gold  points is greater than the value of the function computed at the $x$ corresponding to the red point. These $30$ qualitative judgments is the only information we have on the function for $x \in [2.5,5]$. 

We define the Kernel and an empty skewness function for SkewGP. We are going to use  a zero dimension SkewGP, which is a GP prior and so we do not need Delta.

In [None]:
def Kernel(X1,X2,params,diag_=False):
        lengthscale=params['lengthscale']['value']
        variance   =params['variance']['value']
        if diag_==False:
            diffs = cdist(np.atleast_2d(X1)/ lengthscale, np.atleast_2d(X2) / lengthscale, metric='sqeuclidean')
        else:
            diffs = np.sum((np.atleast_2d(X1)/ lengthscale-np.atleast_2d(X2)/ lengthscale)*(np.atleast_2d(X1)/ lengthscale-np.atleast_2d(X2)/ lengthscale),axis=1)
        return variance * np.exp(-0.5 * diffs)
    
def Delta(X,params):
    #empty
    return []

logexp=commoncode.logexp()
latent_dim=0
params0={'lengthscale': {'value':np.array([1.0]), 
                    'range':np.vstack([[0.00001, 50.0]]),
                    'transform': logexp},
         'variance': {'value':np.array([1.0]), 
                    'range':np.vstack([[0.001, 100.0]]),
                    'transform': logexp},
         'noise_variance': {'value':np.array([noise_variance]), 
                    'range':np.vstack([[0.000001, 50.001]]),
                    'transform': logexp}
      }

model = SkewGP.SkewGP(X,Kernel, Delta, params0, W=W, C=C, Y=Y, Z=Z,  latent_dim =0, type_y='mixed', jitter=1e-4)
#we optimize the hyperparameters
ml=model.optimize(max_iters=1000) 
print("Marginal Likelihood",ml)

In [None]:
model.params


In [None]:
Xpred=np.linspace(0,5.5,200)[:,None]
print(Xpred.shape)
predictions=model.predict(Xpred,nsamples=10000);

In [None]:
plt.figure(figsize=(16,4))
plot_data(x,y,Y,C,Zplist,Wplist,typed,figsize=(16,4),shifty=1)
plt.scatter(x[iref],y[iref],color='red',zorder=1000)
plt.plot(xx,fun(xx,noise_std=0.0),color='b',linestyle=':',zorder=0)


#we compute the credible intervals
credib_int = pm.stats.hpd(predictions.T)
#we plot the latent function mean and credible interval
plt.plot(Xpred[:,0],credib_int[:,1],color='C2', linestyle=':')
plt.plot(Xpred[:,0],credib_int[:,0],color='C2', linestyle=':')
plt.plot(Xpred[:,0],np.mean(predictions,axis=1), label='mean',color='C2')
plt.xlabel("x",fontsize=16)
plt.ylabel("f(x)",fontsize=16);