In [80]:
## load in relevant packages
import pandas as pd  
import numpy as np
import matplotlib.pyplot as plt

np.random.seed(1)

In [81]:
##===================================================================================
## read in data

## winter
## collapsed over time
dta = pd.read_csv("../data/kcl_london_model_data_winter_collapsed.csv", sep=',') 
## aggregated over time
#dta = pd.read_csv("../data/kcl_london_model_data_winter_agg_time.csv", sep=',')

## not winter
## collapsed over time
#dta = pd.read_csv("../data/kcl_london_model_data_nowinter_collapsed.csv", sep=',')
## aggregated over time
#dta = pd.read_csv("../data/kcl_london_model_data_nowinter_agg_time.csv", sep=',')

## monthly data (2000-2019)
#dta = pd.read_csv("../data/kcl_london_model_data_monthly.csv", sep=',')
## subset to only January
#dta = dta[dta['month']==1]

## set variables to use in model
#params = ['latitude', 'longitude', 'year']
params = ['latitude', 'longitude']
ncols= len(params)

##===================================================================================

In [82]:
## preview data
print(dta.shape)
print(dta.head())

## standardize X-values (or should I just subtract 52 from latitude and leave longitude? 
#if 'year' in dta.columns:
#    dta[['year']] = dta[['year']] - np.min(dta[['year']])
#if 'latitude' in dta.columns:
#    dta[['latitude']] = dta[['latitude']] - 52

## divide into features and variable
X = dta[params].values
y = dta.loc[:,'nox'].values
y = y.reshape(-1,1)

print(X.shape)
print(y.shape)

## print previews
print(y[0:10])
print(X[1:10,:])

(230, 6)
                                   site code   latitude  longitude  \
0                      Heathrow Airport  LH2  51.479234  -0.440531   
1  Richmond Upon Thames - Craneford Way  RHC  51.449051  -0.342532   
2  Richmond Upon Thames - Craneford Way  RHD  51.449051  -0.342532   
3                    Ealing - Horn Lane  EA8  51.518948  -0.265617   
4                  Brent - Neasden Lane  BT5  51.552656  -0.248774   

    site_type         nox  
0  Industrial  142.307206  
1  Industrial   72.340567  
2  Industrial   71.733144  
3  Industrial  135.887743  
4  Industrial  130.148268  
(230, 2)
(230, 1)
[[142.30720557]
 [ 72.34056713]
 [ 71.7331443 ]
 [135.88774308]
 [130.14826753]
 [103.62288152]
 [ 61.60730568]
 [ 56.24558636]
 [ 89.99836659]
 [ 65.54394518]]
[[51.44905133 -0.3425321 ]
 [51.44905133 -0.3425321 ]
 [51.518948   -0.265617  ]
 [51.552656   -0.248774  ]
 [51.5526706  -0.24853069]
 [51.39431543 -0.16516146]
 [51.39434156 -0.16516104]
 [51.3892869  -0.14166152]
 [51.38

In [83]:
## create validation dataset (no test set since using MLL)
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.25, random_state=0) 

print(X_train.shape)
print(y_train.shape)
print(X_val.shape)
print(y_val.shape)
#print(max(X_train[:,1]))
#print(min(X_train[:,1]))

(172, 2)
(172, 1)
(58, 2)
(58, 1)


In [84]:
## Normalize Y
from sklearn.preprocessing import StandardScaler  
feature_scaler = StandardScaler() 

## standardize y-values
y_train = feature_scaler.fit_transform(y_train)
y_val = feature_scaler.fit_transform(y_val)

#if X_train.shape[1] >= 3:
#    X_train[:,0:2] = feature_scaler.fit_transform(X_train[:,0:2])
#    X_val[:,0:2] = feature_scaler.fit_transform(X_val[:,0:2])
#else:
X_train = feature_scaler.fit_transform(X_train)
X_val = feature_scaler.fit_transform(X_val)


## TODO: add check for whether columns are correctly standardized, formatted (no strings)

print(y_train.shape)
print(X_val.shape)
#print(y_train[0:5,:])
#print(X_train.shape)
#print(X_train[0:5,:])
#print(X_val[0:5,:])
print(min(y_train[:,0]))
print(max(y_train[:,0]))

(172, 1)
(58, 2)
-1.392912629793989
4.804710386375748


In [85]:
print(np.mean(y_train[:,0]))
print(np.var(y_train[:,0]))
print(np.mean(y_val[:,0]))
print(np.var(y_val[:,0]))

print(np.mean(X_train[:,0]))
print(np.var(X_train[:,0]))
print(np.mean(X_train[:,1]))
print(np.var(X_train[:,1]))

-5.783487384093839e-16
1.0000000000000004
1.0145141431919534e-16
0.9999999999999999
4.5066276292144787e-13
1.0
7.745742032268534e-17
1.0


In [91]:
## build GP model
import gpflow

## set kernel
## 0: space (1 RBF)
## 1: space + time (2 RBF)
## 2: space + time (1 RBF, 1 Linear)
## 3: space + time + site_type (1 RBF, 1 RBF/Linear, )
which_kernel = 0
which_iter = 1

if (which_kernel == 1) & ('year' not in params):
        print("ERROR: did not specify YEAR as parameter!!")

hmc_model_0_iter_1.sav


In [88]:
## do I need ARD?
## "In our case, we will use the Squared Exponential covariance function for f,
## and an Automatic Relevence Determination version of the SE covariance function for g.
## The ARD will allow us to find which predictor variables affect predictions from the model,
## which should relate to their importance."

if which_kernel == 0:
    kernel = gpflow.kernels.RBF(2, active_dims=[0,1])
if which_kernel == 1:
    kernel = gpflow.kernels.RBF(2, active_dims=[0,1]) * gpflow.kernels.RBF(1 , active_dims=[2], ARD=False)
if which_kernel == 2:
    kernel = gpflow.kernels.RBF(2, active_dims=[0,1]) * gpflow.kernels.Linear(1 , active_dims=[2], ARD=False)
    
## build model
with gpflow.defer_build():
    ## log-liklihood???
    l = gpflow.likelihoods.StudentT()
    m = gpflow.models.GPMC(X_train, y_train, kernel, likelihood=l)

## view 
m.as_pandas_table()

Unnamed: 0,class,prior,transform,trainable,shape,fixed_shape,value
GPMC/V,Parameter,"N(0.0,1.0)",(none),True,"(172, 1)",True,"[[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0...."
GPMC/kern/lengthscales,Parameter,,+ve,True,(),True,1.0
GPMC/kern/variance,Parameter,,+ve,True,(),True,1.0
GPMC/likelihood/scale,Parameter,,+ve,True,(),True,1.0


In [None]:
## set priors for hyperparameters
#m.kern.kernels[0].lengthscales.prior = gpflow.priors.Gamma(1., 1.)
#m.kern.kernels[0].variance.prior = gpflow.priors.Gamma(1.,1.)
#m.kern.kernels[1].lengthscales.prior = gpflow.priors.Gamma(1., 1.)
#m.kern.kernels[1].variance.prior = gpflow.priors.Gamma(1.,1.)

In [92]:
import datetime
currentDT = datetime.datetime.now()
print (str(currentDT))

2019-04-23 19:27:23.215003


In [94]:
## Marginal Liklihood Maximization
## picks the most simple model that picks the data the best


## MODELING
## what choice in optimizer??????
## settings for optimization
## how to evaluate actual model? R^2 
## what term do I compare? how to determine "fit"?
## small value = more complex
## should I use 10-fold CV?
## should I add a constant kernel?

m.compile()
opt = gpflow.train.AdamOptimizer(0.01)
opt.minimize(m, maxiter=15) # start near MAP

## two things to tune
## HMC uses Oiler approximation (discrete derivative calculator)
## does updates in steps for trajectory of one sample
## samples: 100 (how many accepted of the posterior)
## epsilon: amount of time in time-steps (dt) -- heuristic??????
## lmax, lmin: number of discrete steps along the path -- heuristic???????

s = gpflow.train.HMC()
sample_df = s.sample(m, 100, epsilon=0.12, lmax=20, lmin=5, thin=5, logprobs=False)
print(sample_df.head())

## save 
import pickle
## save sample dataframe
filename = 'hmc_samples_' + str(which_kernel) + '_iter_' + str(which_iter) + '.sav'
pickle.dump(sample_df, open(filename, 'wb'))
## save model object
filename = 'hmc_model_' + str(which_kernel) + '_iter_' + str(which_iter) + '.sav'
#saver= gpflow.saver.Saver()
#saver.save(filename, m)
pickle.dump(m.read_trainables(), open(filename, 'wb'))

import datetime
currentDT = datetime.datetime.now()
print (str(currentDT))

                                              GPMC/V  GPMC/kern/lengthscales  \
0  [[0.004937867770740749], [0.2952406052838989],...                0.828747   
1  [[0.004937867770740749], [0.2952406052838989],...                0.828747   
2  [[0.004937867770740749], [0.2952406052838989],...                0.828747   
3  [[0.004937867770740749], [0.2952406052838989],...                0.828747   
4  [[0.004937867770740749], [0.2952406052838989],...                0.828747   

   GPMC/kern/variance  GPMC/likelihood/scale  
0             1.18959               0.821135  
1             1.18959               0.821135  
2             1.18959               0.821135  
3             1.18959               0.821135  
4             1.18959               0.821135  
2019-04-23 19:31:22.491203


In [99]:
print(np.mean(sample_df.iloc[:,1]))
#print(np.var(sample_df.iloc[:,1]))
print(np.mean(sample_df.iloc[:,2]))
#print(np.var(sample_df.iloc[:,2]))
print(np.mean(sample_df.iloc[:,3]))
#print(np.var(sample_df.iloc[:,3]))
#print(np.mean(sample_df.iloc[:,4]))
#print(np.var(sample_df.iloc[:,4]))

print(sample_df.shape)
#print(sample_df[0:5,0])

## test points -- x()

## make average of line [x1, x2, x3, emissions] -- need to translate the draws compare with 




# make a more informative plot
#plt.figure(figsize=(16, 4))
#for lab, s in sample_df.iteritems():
#    plt.plot(s, label=lab)
#_ = plt.legend(loc=0)
#f, axs = plt.subplots(1,3, figsize=(12,4))

#axs[0].plot(sample_df['model.likelihood.variance'],
#            sample_df['model.kern.variance'], 'k.', alpha = 0.15)
#axs[0].set_xlabel('noise_variance')
#axs[0].set_ylabel('signal_variance')

#axs[1].plot(sample_df['model.likelihood.variance'],
#            sample_df['model.kern.lengthscales'], 'k.', alpha = 0.15)
#axs[1].set_xlabel('noise_variance')
#axs[1].set_ylabel('lengthscale')

#axs[2].plot(sample_df['model.kern.lengthscales'],
#            sample_df['model.kern.variance'], 'k.', alpha = 0.1)
#axs[2].set_xlabel('lengthscale')
#axs[2].set_ylabel('signal_variance')

0.828746994667614
1.1895902366367828
0.8211353271974207
(100, 4)


In [None]:
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C
from sklearn.model_selection import cross_val_score

In [None]:
import gpflow
import numpy as np
import matplotlib
%matplotlib inline
matplotlib.rcParams['figure.figsize'] = (12, 6)
plt = matplotlib.pyplot

X = np.linspace(-3,3,20)
Y = np.random.exponential(np.sin(X)**2)

with gpflow.defer_build():
    k = gpflow.kernels.Matern32(1, ARD=False) + gpflow.kernels.Bias(1)
    l = gpflow.likelihoods.Exponential()
    m = gpflow.models.GPMC(X[:,None], Y[:,None], k, l)

m.kern.kernels[0].lengthscales.prior = gpflow.priors.Gamma(1., 1.)
#m.kern.kernels[1].lengthscales.prior = gpflow.priors.Gamma(1., 1.)
#m.kernels.Bias.variance.prior = gpflow.priors.Gamma(1.,1.)
#m.kernels.Matern32.lengthscales.prior = gpflow.priors.Gamma(1., 1.)
#m.kernels.Matern32.variance.prior = gpflow.priors.Gamma(1.,1.)
#m.kernels.Bias.variance.prior = gpflow.priors.Gamma(1.,1.)

m.compile()
o = gpflow.train.AdamOptimizer(0.01)
o.minimize(m, maxiter=15) # start near MAP

s = gpflow.train.HMC()
samples = s.sample(m, 100, epsilon=0.12, lmax=20, lmin=5, thin=5, logprobs=False)#, verbose=True)
samples.head()


In [None]:
print(np.mean(samples.iloc[:,1]))
print(np.var(samples.iloc[:,1]))
print(np.mean(samples.iloc[:,2]))
print(np.var(samples.iloc[:,2]))

In [None]:
# make a more informative plot
#plt.figure(figsize=(16, 4))
#for lab, s in samples.iteritems():
#    plt.plot(s, label=lab)
#_ = plt.legend(loc=0)




#for col in samples.columns.sort_values()[1:]:
#    samples[col].hist(label=col.split('.')[-1], alpha=0.4, bins=15)

In [None]:
xtest = np.linspace(-4,4,100)[:,None]
f_samples = []
for i, s in samples.iterrows():
    m.assign(s)
    f_samples.append(m.predict_f_samples(xtest, 5, initialize=False))
f_samples = np.vstack(f_samples)

In [None]:
rate_samples = np.exp(f_samples[:, :, 0])

line, = plt.plot(xtest, np.mean(rate_samples, 0), lw=2)
plt.fill_between(xtest[:,0],
                 np.percentile(rate_samples, 5, axis=0),
                 np.percentile(rate_samples, 95, axis=0),
                 color=line.get_color(), alpha = 0.2)

plt.plot(X, Y, 'kx', mew=2)
plt.ylim(-0.1, np.max(np.percentile(rate_samples, 95, axis=0)))

In [None]:
opt = gpflow.train.ScipyOptimizer()
opt.minimize(m)

In [None]:
N = 12
X = np.random.rand(N,1)
Y = np.sin(12*X) + 0.66*np.cos(25*X) + np.random.randn(N,1)*0.1 + 3
print(Y.shape)
print(X.shape)
plt.plot(X, Y, 'kx', mew=2)

k = gpflow.kernels.Matern52(1, lengthscales=0.3)
m = gpflow.models.GPR(X, Y, kern=k)
m.likelihood.variance = 0.01
m.compile()

In [None]:
def plot(m):
    xx = np.linspace(-0.1, 1.1, 100)[:,None]
    mean, var = m.predict_y(xx)
    plt.figure(figsize=(12, 6))
    plt.plot(X, Y, 'kx', mew=2)
    plt.plot(xx, mean, 'b', lw=2)
    plt.fill_between(xx[:,0], mean[:,0] - 2*np.sqrt(var[:,0]), mean[:,0] + 2*np.sqrt(var[:,0]), color='blue', alpha=0.2)
    plt.xlim(-0.1, 1.1)
plot(m)

In [None]:
from scipy.spatial import distance as d

print(X_train[0:10,:])

space_length_scale = 1.0
time_length_scale = 1.0

#X_space = X_train[:,0:2]

X_space = np.atleast_2d(X_train[0,0:2])
Y_space = np.atleast_2d(X_train[1,0:2])
X_time = np.atleast_2d(X_train[0,2])
Y_time = np.atleast_2d(X_train[1,2])
print(X_space[0:5,:])
print(X_space.shape)
print(X_time[0:5,:])
print(X_time.shape)


dists_space = d.cdist(X_space / space_length_scale, Y_space / space_length_scale, metric='sqeuclidean')
dists_time = d.cdist(X_time / time_length_scale, Y_time / time_length_scale, metric='sqeuclidean')
K = np.exp(-.5 * dists_space) * np.exp(-.5 * dists_time)

print(dists_space)
print(dists_time)
print(K)


dists_space = d.pdist(X_space / space_length_scale, metric='sqeuclidean')
dists_time = d.pdist(X_time / time_length_scale, metric='sqeuclidean')
K = np.exp(-.5 * dists_space) * np.exp(-.5 * dists_time)
# convert from upper-triangular matrix to square matrix
K = d.squareform(K)
np.fill_diagonal(K, 1)

print(dists_space)
print(dists_time)
print(K)

In [None]:
## define custom kernel

class RBF_space_RBF_time(StationaryKernelMixin, NormalizedKernelMixin, Kernel):
    def __init__(self, space_length_scale=1.0, time_length_scale=1.0, space_length_scale_bounds=(1e-5, 1e5), time_length_scale_bounds=(1e-5, 1e5),):
        self.space_length_scale = space_length_scale
        self.time_length_scale = time_length_scale
        self.space_length_scale_bounds = space_length_scale_bounds
        self.time_length_scale_bounds = time_length_scale_bounds

    #@property
    #def anisotropic(self):
    #    return np.iterable(self.length_scale) and len(self.length_scale) > 1

    @property
    def hyperparameter_space_length_scale(self):
        #if self.anisotropic:
        #    return Hyperparameter("length_scale", "numeric",
        #                          self.length_scale_bounds,
        #                          len(self.length_scale))
        return Hyperparameter(
            "space_length_scale", "numeric", self.space_length_scale_bounds)
    
    @property
    def hyperparameter_time_length_scale(self):
        #if self.anisotropic:
        #    return Hyperparameter("length_scale", "numeric",
        #                          self.length_scale_bounds,
        #                          len(self.length_scale))
        return Hyperparameter(
            "time_length_scale", "numeric", self.time_length_scale_bounds)
    
    def __call__(self, X, Y=None, eval_gradient=False):
        ## collect space coordinates (lat/long)
        X_space = np.atleast_2d(X[:,0:2])
        ## collect time coordinates (year)
        X_time = np.atleast_2d(X[:,2])
        #X_space = np.atleast_2d(X)
        space_length_scale = self.space_length_scale
        #length_scale = _check_length_scale(X, self.length_scale)
        if Y is None:
            dists_space = pdist(X_space / space_length_scale, metric='sqeuclidean')
            dists_time = pdist(X_time / time_length_scale, metric='sqeuclidean')
            K = np.exp(-.5 * dists_space) * np.exp(-.5 * dists_time)
            # convert from upper-triangular matrix to square matrix
            K = squareform(K)
            np.fill_diagonal(K, 1)
        else:
        #    if eval_gradient:
        #        raise ValueError(
        #            "Gradient can only be evaluated when Y is None.")
            Y_space = np.atleast_2d(Y[:,0:2])
            Y_time = np.atleast_2d(Y[:,2])
            dists_space = cdist(X_space / space_length_scale, Y_space / space_length_scale,
                          metric='sqeuclidean')
            dists_time = cdist(X_time / time_length_scale, Y_time / time_length_scale,
                          metric='sqeuclidean')
            K = np.exp(-.5 * dists_space) * np.exp(-.5 * dists_time)
        
        return K

        #if eval_gradient:
        #    if self.hyperparameter_length_scale.fixed:
        #        # Hyperparameter l kept fixed
        #        return K, np.empty((X.shape[0], X.shape[0], 0))
        #    elif not self.anisotropic or length_scale.shape[0] == 1:
        #        K_gradient = \
        #            (K * squareform(dists))[:, :, np.newaxis]
        #        return K, K_gradient
        #    elif self.anisotropic:
        #        # We need to recompute the pairwise dimension-wise distances
        #        K_gradient = (X[:, np.newaxis, :] - X[np.newaxis, :, :]) ** 2 \
        #            / (length_scale ** 2)
        #        K_gradient *= K[..., np.newaxis]
        #        return K, K_gradient
        #else:
        #    return K

    def __repr__(self):
        return "{0}(length_scale={1:.3g})".format(self.__class__.__name__, np.ravel(self.space_length_scale)[0])
        #if self.anisotropic:
        #    return "{0}(length_scale=[{1}])".format(
        #        self.__class__.__name__, ", ".join(map("{0:.3g}".format,
        #                                           self.length_scale)))
        #else:  # isotropic
        #    return "{0}(length_scale={1:.3g})".format(
        #        self.__class__.__name__, np.ravel(self.length_scale)[0])


In [None]:
## ?? Should I specify a constant kernel?
kernel = RBF(length_scale=1)
gp = GaussianProcessRegressor(kernel=kernel, alpha=1, normalize_y=True, n_restarts_optimizer=5)
all_accuracies = cross_val_score(estimator=gp, X=X_train, y=y_train, cv=5, scoring='r2').mean()
print(all_accuracies)
## winter data
## sigma = 1000, alpha=1
## 0.8037412498178955
## sigma = 1, alpha=1
## 0.8055139240920436
## sigma=1, alpha = 100
## 0.14285243700525588
## sigma=10, alpha=100
## 0.14285243655664298

In [None]:
## search for best hyperparameters: sigma
all_accuracies_mean = []
all_accuracies_std = []
sigmas = np.arange(start=0.1, stop=4.5, step=0.5)
for sigma in sigmas:
    kernel = RBF(length_scale=sigma)
    gp = GaussianProcessRegressor(kernel=kernel, alpha=1, normalize_y=True, n_restarts_optimizer=5)
    all_accuracies = cross_val_score(estimator=gp, X=X_train, y=y_train, cv=5, scoring='r2')
    all_accuracies_mean.append(all_accuracies.mean())
    all_accuracies_std.append(all_accuracies.std())
print(sigmas)
print(all_accuracies_mean)
print(all_accuracies_std)

In [None]:
## function to select the best parameter
def best_parameter (mean_acc, param_list):
    ## take sd of all accuracies, if small, then take the median sigma
    acc_sd = np.std(mean_acc)
    if acc_sd < 0.005:
        best_param = np.median(param_list)
    else:
        ## pick the sigma with the highest R^2
        best_param = param_list[np.argmax(mean_acc)]
    
        ## if large sd and tie, default to larger sigma (more variance)
        ## or could select the default value (1)
        #best_param = 1

    return(best_param)

In [None]:
## find best parameter for sigma
best_sigma = best_parameter(all_accuracies_mean, sigmas)
print(best_sigma)

In [None]:
## search for best hyperparameters: alpha
all_accuracies_al_mean = []
all_accuracies_al_std = []
alphas = np.arange(start=0.1, stop=1.1, step=0.1)
for alpha in alphas:
    kernel = RBF(length_scale=best_sigma)
    gp = GaussianProcessRegressor(kernel=kernel, alpha=alpha, normalize_y=True, n_restarts_optimizer=5)
    all_accuracies_al = cross_val_score(estimator=gp, X=X_train, y=y_train, cv=5, scoring='r2')
    all_accuracies_al_mean.append(all_accuracies_al.mean())
    all_accuracies_al_std.append(all_accuracies_al.std())
print(alphas)
print(all_accuracies_al_mean)
print(all_accuracies_al_std)

In [None]:
## select the best alpha
best_alpha = best_parameter(all_accuracies_al_mean, alphas)
print(best_alpha)

In [None]:
## now train model with best parameters
kernel = RBF(length_scale=best_sigma)
## ?? Should I re-run CV again when getting the final thetas?
gp = GaussianProcessRegressor(kernel=kernel, alpha=best_alpha, normalize_y=True, n_restarts_optimizer=5)
gp.fit(X_train, y_train)

In [None]:
y_pred, sigma = gp.predict(X_val, return_std=True)
print(gp.score(X_val, y_val))
print(y_pred[0:10])

## save model object
import pickle
filename = 'finalized_model_base_jan.sav'
pickle.dump(gp, open(filename, 'wb'))

## how to load back in later to compare
#loaded_model = pickle.load(open(filename, 'rb'))
#result = loaded_model.score(X_test, Y_test)
#print(result)

In [None]:
# Plot the function, the prediction and the 95% confidence interval based on
# the MSE
plt.figure()
#plt.errorbar(X.ravel(), y, dy, fmt='r.', markersize=10, label=u'Observations')
## ?? How to visualize X values?
plt.plot(X_val, y_pred, 'b-', label=u'Prediction')
#plt.fill(np.concatenate([x, x[::-1]]),
#         np.concatenate([y_pred - 1.9600 * sigma,
#                        (y_pred + 1.9600 * sigma)[::-1]]),
#         alpha=.5, fc='b', ec='None', label='95% confidence interval')
#plt.xlabel('$x$')
#plt.ylabel('$f(x)$')
#plt.ylim(-10, 20)
#plt.legend(loc='upper left')

plt.show()

In [None]:
import numpy as np
from matplotlib import pyplot as plt

from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C

np.random.seed(1)


def f(x):
    """The function to predict."""
    return x * np.sin(x)

# ----------------------------------------------------------------------
#  First the noiseless case
X = np.atleast_2d([1., 3., 5., 6., 7., 8.]).T

# Observations
y = f(X).ravel()

# Mesh the input space for evaluations of the real function, the prediction and
# its MSE
x = np.atleast_2d(np.linspace(0, 10, 1000)).T

# Instantiate a Gaussian Process model
kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2))
gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9)

# Fit to data using Maximum Likelihood Estimation of the parameters
gp.fit(X, y)

# Make the prediction on the meshed x-axis (ask for MSE as well)
y_pred, sigma = gp.predict(x, return_std=True)

In [None]:
print(y_pred)