In [39]:
import numpy as np
import math
import GPy
import matplotlib.pyplot as plt
import seaborn

In [40]:
def generate_data():
    data = np.loadtxt('./../svm.csv', delimiter=",")
    sample = np.random.choice(data.shape[0], size = 32, replace=False)
    samp_data = data[sample]
    #X = samp_data[:, :-1]
    #y = samp_data[:, -1].reshape(-1, 1)
    X = samp_data[:, :-1]
    y = samp_data[:, -1].reshape(-1, 1)
    return X, y

X, y = generate_data()
y = np.log10(y)

In [41]:
def base_kernel_search(X, y):
    kernel_comp = [GPy.kern.RBF(input_dim=X.shape[1], variance=1.0, lengthscale=1.0, useGPU=True), 
                         GPy.kern.RatQuad(input_dim=X.shape[1], variance = 1.0, lengthscale= 1.0),
                         GPy.kern.Matern32(input_dim=X.shape[1], variance = 1.0, lengthscale= 1.0),
                         GPy.kern.Matern52(input_dim=X.shape[1], variance = 1.0, lengthscale= 1.0),
                         GPy.kern.StdPeriodic(input_dim=X.shape[1], variance = 1.0, lengthscale= 1.0, useGPU = True),
                         GPy.kern.Exponential(input_dim=X.shape[1], variance = 1.0, lengthscale= 1.0)]
    
    best_model = None
    best_bic = float('inf')
    mean = GPy.mappings.Constant(input_dim = X.shape[1], output_dim = 1, value = np.mean(y))


    for kernel1 in kernel_comp:
        kernel = kernel1
        model = GPy.models.GPRegression(X, y, kernel, noise_var=0.001**2, mean_function=mean)
        model.Gaussian_noise.variance.fix()
        model.optimize_restarts(5, verbose=False)
        n = model.num_data
        k = len(model.parameters)
        log_likelihood = model.log_likelihood()
        
        bic = k * np.log(n) - 2 * log_likelihood
        print(bic)
        print(model)

        if bic < best_bic:
            best_model = (kernel, model)
            best_bic = bic

    print(best_bic, best_model)

In [42]:
import warnings
with warnings.catch_warnings(action="ignore"):
    base_kernel_search(X, y)

49.90085590602205

Name : GP regression
Objective : 19.751824098811436
Number of Parameters : 4
Number of Optimization Parameters : 3
Updates : True
Parameters:
  [1mGP_regression.         [0;0m  |                 value  |  constraints  |  priors
  [1mconstmap.C             [0;0m  |    2.5354464174126083  |               |        
  [1mrbf.variance           [0;0m  |   0.20121269712247727  |      +ve      |        
  [1mrbf.lengthscale        [0;0m  |  0.018559564081515947  |      +ve      |        
  [1mGaussian_noise.variance[0;0m  |                 1e-06  |   +ve fixed   |        
49.90085592765645

Name : GP regression
Objective : 19.751824109628636
Number of Parameters : 5
Number of Optimization Parameters : 4
Updates : True
Parameters:
  [1mGP_regression.         [0;0m  |                 value  |  constraints  |  priors
  [1mconstmap.C             [0;0m  |    2.5354453791521125  |               |        
  [1mRatQuad.variance       [0;0m  |   0.20121190595776378  

In [43]:
def kernel_search(X, y):
    kernel_comp = [GPy.kern.RBF(input_dim=X.shape[1], variance=1.0, lengthscale=1.0), 
                         GPy.kern.RatQuad(input_dim=X.shape[1], variance = 1.0, lengthscale= 1.0),
                         GPy.kern.Matern32(input_dim=X.shape[1], variance = 1.0, lengthscale= 1.0),
                         GPy.kern.Matern52(input_dim=X.shape[1], variance = 1.0, lengthscale= 1.0),
                         GPy.kern.StdPeriodic(input_dim=X.shape[1], variance = 1.0, lengthscale= 1.0),
                         GPy.kern.Exponential(input_dim=X.shape[1], variance = 1.0, lengthscale= 1.0)]
    
    best_model = None
    best_bic = float('inf')
    mean_comp = [GPy.mappings.Constant(input_dim = X.shape[1], output_dim = 1, value = np.mean(y)), 
            GPy.mappings.Linear(input_dim = X.shape[1], output_dim = 1)]


    for kernel1 in kernel_comp:
        for kernel2 in kernel_comp:
            for mean in mean_comp:
                # Combine kernels
                kernel = kernel1 * kernel2
                model = GPy.models.GPRegression(X, y, kernel, noise_var=0.001**2, mean_function=mean)
                model.Gaussian_noise.variance.fix()
                model.optimize()
                n = model.num_data
                k = len(model.optimizer_array)
                log_likelihood = model.log_likelihood()
                
                bic = k * np.log(n) - 2 * log_likelihood
                print(bic)
                print(model)

                if bic < best_bic:
                    best_model = (kernel, model)
                    best_bic = bic

    print(best_bic, best_model)

In [44]:
import warnings
with warnings.catch_warnings(action="ignore"):
    kernel_search(X, y)

56.832327861771034

Name : GP regression
Objective : 19.751824173886202
Number of Parameters : 6
Number of Optimization Parameters : 5
Updates : True
Parameters:
  [1mGP_regression.         [0;0m  |                 value  |  constraints  |  priors
  [1mconstmap.C             [0;0m  |    2.5354469452734847  |               |        
  [1mmul.rbf.variance       [0;0m  |     0.448567329787389  |      +ve      |        
  [1mmul.rbf.lengthscale    [0;0m  |  0.027027514339263196  |      +ve      |        
  [1mmul.rbf_1.variance     [0;0m  |     0.448567329787389  |      +ve      |        
  [1mmul.rbf_1.lengthscale  [0;0m  |  0.027027514339263196  |      +ve      |        
  [1mGaussian_noise.variance[0;0m  |                 1e-06  |   +ve fixed   |        
681.6767327569249

Name : GP regression
Objective : 326.9754227672636
Number of Parameters : 9
Number of Optimization Parameters : 8
Updates : True
Parameters:
  [1mGP_regression.         [0;0m  |               value  | 

In [45]:
def best_kernel_search(X, y):
    
    kernel = GPy.kern.Matern52(input_dim=X.shape[1], variance = 1.0, lengthscale= 1.0)
    mean = GPy.mappings.Constant(input_dim = X.shape[1], output_dim = 1, value = np.mean(y))
    model = GPy.models.GPRegression(X, y, kernel, noise_var=0.001**2, mean_function=mean)
    model.Gaussian_noise.variance.fix()
    model.optimize_restarts(100, verbose=False)

    n = model.num_data
    k = len(model.optimizer_array)
    log_likelihood = model.log_likelihood()
    
    bic = k * np.log(n) - 2 * log_likelihood
    print(bic)
    print(model)

    return model

In [46]:
store = best_kernel_search(X, y)

49.90085589327253

Name : GP regression
Objective : 19.751824092436674
Number of Parameters : 4
Number of Optimization Parameters : 3
Updates : True
Parameters:
  [1mGP_regression.         [0;0m  |                value  |  constraints  |  priors
  [1mconstmap.C             [0;0m  |    2.535446936958904  |               |        
  [1mMat52.variance         [0;0m  |    0.201212187950071  |      +ve      |        
  [1mMat52.lengthscale      [0;0m  |  0.01025308967130644  |      +ve      |        
  [1mGaussian_noise.variance[0;0m  |                1e-06  |   +ve fixed   |        
