In [1]:
%matplotlib inline
import  matplotlib
import matplotlib.pyplot as plt
import pystan

In [2]:
import numpy as np
import scipy.io as sio

In [3]:
df = sio.loadmat('snelson1d.mat')
X = df['X']
Y = df['Y']
X_t = df['tX']
Y_t = df['tY']

In [4]:
def kmm_init(X, m = 20):
    """
    This is the same initialization algorithm that is used
    in Kmeans++. It's quite simple and very useful to initialize
    the locations of the inducing points in sparse GPs.
    
    http://ilpubs.stanford.edu:8090/778/1/2006-13.pdf
    1. Take one center c1, initially chosen at random
    2. Take a new center ci with prob. \frac{D(x)^2}{\sum D(x)^2}
    3. Repeat step 2 until we have K centers.
    4. Proceed with standard K-means clustering..
    
    where D(x) is the distance to the closest cluster center. 
    If we dont want to invest time on that, just use the initialisation which is good enough atleast for the 1D case.
    
    :param X: data
    :param m: number of inducing points
    """

    # compute the distances
    XXT = np.dot(X, X.T)
    D = (-2.*XXT + np.diag(XXT)[:,np.newaxis] + np.diag(XXT)[np.newaxis,:])

    # select the first point
    s = np.random.permutation(X.shape[0])[0]
    inducing = [s]
    prob = D[s]/D[s].sum()

    for z in range(m-1):
        s = np.random.multinomial(1, prob.flatten()).argmax()
        inducing.append(s)
        prob = D[s]/D[s].sum()

    inducing = np.array(inducing)
    return X[inducing]

In [5]:
X_u = kmm_init(X)
M = len(X_u)

X_u_noisy = np.linspace(0,6, M)[:, None]
X_u_noisy = X_u + 0.1*np.random.rand(*X_u.shape) 

In [6]:
stan_code_rff = """
data {
    int<lower=1> N;
    int<lower=1> M;
    vector [N]x;
    vector[N] y;
}

transformed data {
    real delta = 1e-6;
    vector[N] f_mean = rep_vector(0, N);
    
}

parameters{
    real <lower=1e-4> sigvar;
    real<lower=1e-6> sigma;
    
    vector[M] omega;

}

transformed parameters {
    vector[N] f;
    vector[M] b;
    
    matrix[N, N] K;
    
    matrix[N, M] features;
    matrix[N, M] cosfeatures;
    matrix[N, M] sinfeatures;
    matrix[N, 2*M] fullfeatures;
    matrix[N, M] b_mat ;
    
    {
        features = x * omega' *2*pi();

         for(i in 1:N){
             for(j in 1:M){
                 cosfeatures[i,j] = cos(features[i,j] + b[j]);
                 sinfeatures[i,j] = sin(features[i,j] + b[j]);
                 fullfeatures[i,j] = cosfeatures[i,j];
                 fullfeatures[i,M+j] = sinfeatures[i,j];
             }
         }

        K = cosfeatures*cosfeatures'*sqrt(2*square(sigvar)/M) ;
    }
}


model {

    sigvar ~ normal(0.85, 0.25);
    sigma ~ normal(0, 0.35);
    b ~ uniform(0, 2*pi());
    omega ~ normal(0, 1);
    
    f ~ multi_normal(f_mean, K);
    y ~ normal(f, sigma);
    
}

"""

In [7]:
stan_model_rff = pystan.StanModel(model_code=stan_code_rff)

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_96c7aa90acf882538c06ae4baaeb9ef1 NOW.


In [8]:
gp_snelson_data = {'N':200, 'x':X.flatten(), 'y':Y.flatten(), 'M':50 }

In [9]:
gp_snelson_data1 = {'N':200, 'x':X.flatten(), 'y':Y.flatten(), 'M':50, 'sigvar':1.0, 'sigma':0.4 }

In [10]:
fit_gpr_rff = stan_model_rff.sampling(data=gp_snelson_data, iter=400, chains=2)

  elif np.issubdtype(np.asarray(v).dtype, float):


RuntimeError: Initialization failed.