In [115]:
import numpy as np
import math
import GPy
import matplotlib.pyplot as plt
import seaborn

In [116]:
def generate_data():
    data = np.loadtxt('./../lda.csv', delimiter=",")
    sample = np.random.choice(data.shape[0], size = 32, replace=False)
    samp_data = data[sample]
    #X = samp_data[:, :-1]
    #y = samp_data[:, -1].reshape(-1, 1)
    X = samp_data[:, :-1]
    y = samp_data[:, -1].reshape(-1, 1)
    return X, y

X, y = generate_data()
y = np.log10(y)

In [117]:
def base_kernel_search(X, y):
    kernel_comp = [GPy.kern.RBF(input_dim=X.shape[1], variance=1.0, lengthscale=1.0, useGPU=True), 
                         GPy.kern.RatQuad(input_dim=X.shape[1], variance = 1.0, lengthscale= 1.0),
                         GPy.kern.Matern32(input_dim=X.shape[1], variance = 1.0, lengthscale= 1.0),
                         GPy.kern.Matern52(input_dim=X.shape[1], variance = 1.0, lengthscale= 1.0),
                         GPy.kern.StdPeriodic(input_dim=X.shape[1], variance = 1.0, lengthscale= 1.0, useGPU = True),
                         GPy.kern.Exponential(input_dim=X.shape[1], variance = 1.0, lengthscale= 1.0)]
    
    best_model = None
    best_bic = float('inf')
    mean = GPy.mappings.Constant(input_dim = X.shape[1], output_dim = 1, value = np.mean(y))


    for kernel1 in kernel_comp:
        kernel = kernel1
        model = GPy.models.GPRegression(X, y, kernel, noise_var=0.001**2, mean_function=mean)
        model.Gaussian_noise.variance.fix()
        model.optimize_restarts(5, verbose=False)
        n = model.num_data
        k = len(model.parameters)
        log_likelihood = model.log_likelihood()
        
        bic = k * np.log(n) - 2 * log_likelihood
        print(bic)
        print(model)

        if bic < best_bic:
            best_model = (kernel, model)
            best_bic = bic

    print(best_bic, best_model)

In [118]:
import warnings
with warnings.catch_warnings(action="ignore"):
    base_kernel_search(X, y)

-4.416176917381449

Name : GP regression
Objective : -7.406692312890314
Number of Parameters : 4
Number of Optimization Parameters : 3
Updates : True
Parameters:
  [1mGP_regression.         [0;0m  |                 value  |  constraints  |  priors
  [1mconstmap.C             [0;0m  |     4.261026438748114  |               |        
  [1mrbf.variance           [0;0m  |  0.036852839015622384  |      +ve      |        
  [1mrbf.lengthscale        [0;0m  |    0.4497793983532726  |      +ve      |        
  [1mGaussian_noise.variance[0;0m  |                 1e-06  |   +ve fixed   |        
-6.136531548096048

Name : GP regression
Objective : -8.266869628247614
Number of Parameters : 5
Number of Optimization Parameters : 4
Updates : True
Parameters:
  [1mGP_regression.         [0;0m  |                value  |  constraints  |  priors
  [1mconstmap.C             [0;0m  |   4.2845883154181825  |               |        
  [1mRatQuad.variance       [0;0m  |  0.03745393500703986  |

In [119]:
def kernel_search(X, y):
    kernel_comp = [GPy.kern.RBF(input_dim=X.shape[1], variance=1.0, lengthscale=1.0), 
                         GPy.kern.RatQuad(input_dim=X.shape[1], variance = 1.0, lengthscale= 1.0),
                         GPy.kern.Matern32(input_dim=X.shape[1], variance = 1.0, lengthscale= 1.0),
                         GPy.kern.Matern52(input_dim=X.shape[1], variance = 1.0, lengthscale= 1.0),
                         GPy.kern.StdPeriodic(input_dim=X.shape[1], variance = 1.0, lengthscale= 1.0),
                         GPy.kern.Exponential(input_dim=X.shape[1], variance = 1.0, lengthscale= 1.0)]
    
    best_model = None
    best_bic = float('inf')
    mean_comp = [GPy.mappings.Constant(input_dim = X.shape[1], output_dim = 1, value = np.mean(y)), 
            GPy.mappings.Linear(input_dim = X.shape[1], output_dim = 1)]


    for kernel1 in kernel_comp:
        for kernel2 in kernel_comp:
            for mean in mean_comp:
                # Combine kernels
                kernel = kernel1 * kernel2
                model = GPy.models.GPRegression(X, y, kernel, noise_var=0.001**2, mean_function=mean)
                model.Gaussian_noise.variance.fix()
                model.optimize()
                n = model.num_data
                k = len(model.optimizer_array)
                log_likelihood = model.log_likelihood()
                
                bic = k * np.log(n) - 2 * log_likelihood
                print(bic)
                print(model)

                if bic < best_bic:
                    best_model = (kernel, model)
                    best_bic = bic

    print(best_bic, best_model)

In [120]:
import warnings
with warnings.catch_warnings(action="ignore"):
    kernel_search(X, y)

2.515331252997786

Name : GP regression
Objective : -7.406674130500424
Number of Parameters : 6
Number of Optimization Parameters : 5
Updates : True
Parameters:
  [1mGP_regression.         [0;0m  |               value  |  constraints  |  priors
  [1mconstmap.C             [0;0m  |   4.261028162874061  |               |        
  [1mmul.rbf.variance       [0;0m  |  0.1919710055300377  |      +ve      |        
  [1mmul.rbf.lengthscale    [0;0m  |  0.9999578079655745  |      +ve      |        
  [1mmul.rbf_1.variance     [0;0m  |  0.1919710055300377  |      +ve      |        
  [1mmul.rbf_1.lengthscale  [0;0m  |  0.9999578079655745  |      +ve      |        
  [1mGaussian_noise.variance[0;0m  |               1e-06  |   +ve fixed   |        
82.90711489847097

Name : GP regression
Objective : 27.590613838036578
Number of Parameters : 9
Number of Optimization Parameters : 8
Updates : True
Parameters:
  [1mGP_regression.         [0;0m  |               value  |  constraints  

In [121]:
def best_kernel_search(X, y):
    
    kernel = GPy.kern.StdPeriodic(input_dim=X.shape[1], variance = 1.0, lengthscale= 1.0) * GPy.kern.RatQuad(input_dim=X.shape[1], variance=1.0, lengthscale=1.0)
    mean = GPy.mappings.Constant(input_dim = X.shape[1], output_dim = 1, value = np.mean(y))
    model = GPy.models.GPRegression(X, y, kernel, noise_var=0.001**2, mean_function=mean)
    model.Gaussian_noise.variance.fix()
    model.optimize_restarts(100, verbose=False)

    n = model.num_data
    k = len(model.optimizer_array)
    log_likelihood = model.log_likelihood()
    
    bic = k * np.log(n) - 2 * log_likelihood
    print(bic)
    print(model)

    return model

In [122]:
store = best_kernel_search(X, y)



-19.286127195084624

Name : GP regression
Objective : -21.773139257341356
Number of Parameters : 8
Number of Optimization Parameters : 7
Updates : True
Parameters:
  [1mGP_regression.              [0;0m  |                 value  |  constraints  |  priors
  [1mconstmap.C                  [0;0m  |     4.287000806090743  |               |        
  [1mmul.std_periodic.variance   [0;0m  |    0.3110275017585567  |      +ve      |        
  [1mmul.std_periodic.period     [0;0m  |    0.8799407202343502  |      +ve      |        
  [1mmul.std_periodic.lengthscale[0;0m  |     1.181736676646456  |      +ve      |        
  [1mmul.RatQuad.variance        [0;0m  |     0.182798179837257  |      +ve      |        
  [1mmul.RatQuad.lengthscale     [0;0m  |    2.0587998795711075  |      +ve      |        
  [1mmul.RatQuad.power           [0;0m  |  0.003581960169483112  |      +ve      |        
  [1mGaussian_noise.variance     [0;0m  |                 1e-06  |   +ve fixed   |        