In [3]:
import numpy as np
import matplotlib.pyplot as plt

class GaussianRegressionWithDerivatives():
    def __init__(self, kernel=None, dd_kernel=None, sigma=1e-6):
        self.kernel = kernel
        self.dd_kernel = dd_kernel
        self.sigma = sigma
        self.X = None
        self.y = None
        
    def fit(self, X, y, dy):
        self.X = X
        self.y = y
        self.dy = dy
        # print('the shape of self.kernel(X, X) is: ' + str(self.kernel(X, X).shape))
        self.K = self.kernel(X, X) + self.sigma**2 * np.eye(len(X))
        # print('the shape of self.K is: ' + str(self.K.shape))
        self.dK = self.dd_kernel(X, X) + self.sigma**2 * np.eye(len(X))
        self.L = np.linalg.cholesky(self.K)
        self.dL = np.linalg.cholesky(self.dK)
        # let alpha = inv(K) * y => K * alpha = y => L*L.T * alpha = y => alpha = solve(L.T, solve(L, y))
        self.alpha = np.linalg.solve(self.L.T, np.linalg.solve(self.L, y))
        self.dalpha = np.linalg.solve(self.dL.T, np.linalg.solve(self.dL, dy))

        return self
        
    def predict(self, Xtest):

        K_train_test = self.kernel(self.X, Xtest)
        y_pred = K_train_test.T @ self.alpha

        dK_train_test = self.dd_kernel(self.X, Xtest)
        dy_pred = dK_train_test.T @ self.dalpha

        # L*v = K => v = inv(L)*K => v.T * v = K.T * inv(L.T) * inv(L) * K = K.T * inv(L*L.T) * K
        # v = cp.linalg.solve(self.L, K_train_test)
        # cov = self.kernel(Xtest, Xtest) - v.T @ v
        # std = cp.sqrt(cp.diag(cov))

        # return mean, cov, std
        
        return y_pred, dy_pred

def d_mu(x):
    return 0 

def RBF(x1, x2, length_scale=1.0):
    diff_mat = x1[:, cp.newaxis] - x2
    return np.exp(-0.5 * diff_mat ** 2 / length_scale ** 2)

def d_RBF(x1, x2, length_scale=1.0):
    diff_mat = x1[:, cp.newaxis] - x2
    return diff_mat * cp.exp(-0.5 * diff_mat ** 2 / length_scale ** 2)

def dd_RBF(x1, x2, length_scale=1.0):
    diff_mat = x1[:, cp.newaxis] - x2
    return (1 - diff_mat**2/length_scale**2)/length_scale**2 * cp.exp(-0.5 * diff_mat ** 2 / length_scale ** 2)


X = cp.array(cp.random.uniform(-5, 5, size=(50,)))
y = cp.array(cp.sin(X) + cp.random.normal(0, 0.1, size=(50,)))
dy = cp.array(cp.cos(X))

gr = GaussianRegressionWithDerivatives(RBF, dd_RBF)
gr.fit(X,y,dy)

# use the same data set as training, y_pred and dy_pred should be similar to y and dy
y_pred, dy_pred = gr.predict(X)

print('y is: \n' + str(y[:10]))
print('y_pred is: \n' + str(y_pred[:10]))
print('dy is: \n' + str(dy[:10]))
print('dy_pred: \n' + str(dy_pred[:10]))


ModuleNotFoundError: No module named 'cupy'

In [6]:
import panda as pd

url = "test_data.csv"
dataset = pd.read_csv(url, spe=';')
print(dataset.head())

ModuleNotFoundError: No module named 'panda'

In [9]:
import numpy as np

class GaussianRegressorCholesky:
    def __init__(self, kernel):
        self.kernel = kernel
        self.sigma = None
        self.X = None
        self.y = None

    def fit(self, X, y, sigma=1e-6):
        self.sigma = sigma
        self.X = X
        self.y = y
        self.K = self.kernel(X, X) + self.sigma**2 * np.eye(len(X))
        self.L = np.linalg.cholesky(self.K)
        # let alpha = inv(K) * y => K * alpha = y => L*L.T * alpha = y => alpha = solve(L.T, solve(L, y))
        self.alpha = np.linalg.solve(self.L.T, np.linalg.solve(self.L, y))

    def predict(self, Xtest):
        K_train_test = self.kernel(self.X, Xtest)
        mean = K_train_test.T @ self.alpha

        # L*v = K => v = inv(L)*K => v.T * v = K.T * inv(L.T) * inv(L) * K = K.T * inv(L*L.T) * K
        v = np.linalg.solve(self.L, K_train_test)
        cov = self.kernel(Xtest, Xtest) - v.T @ v
        std = np.sqrt(np.diag(cov))

        return mean, cov, std

def rbf_kernel(X1, X2, length_scale=1.0):
    np.atleast_2d(X1)
    np.atleast_2d(X2)
    s1 = X1.shape
    s2 = X2.shape
    
    if (s1[1] != s2[1]):
        print("ERROR! RBF input matices must have the same number of columns!")
        return

    dists = np.sum((X1[:, np.newaxis] - X2) ** 2, axis=2)
    return np.exp(-dists / (2 * length_scale ** 2))

def mu(x):
    n = x.shape[0]
    return np.sum(x,axis=0) / n

X1 = np.array([[1.,2.,3.],[3.,4.,5.],[5.,6.,7.]])
y = np.array([2.,4.,6.])
gr = GaussianRegressorCholesky(rbf_kernel)
gr.fit(X1,y)

# use the same data set as training, y_pred should be similar to y
y_pred, cov, std = gr.predict(X1)

print('y is: ' + str(y))
print('y_pred is: ' + str(y_pred))
print('cov: \n' + str(cov))
print('std: ' + str(std))

y is: [2. 4. 6.]
y_pred is: [2. 4. 6.]
cov: 
[[1.00008890e-12 0.00000000e+00 0.00000000e+00]
 [0.00000000e+00 1.00019992e-12 0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 1.00008890e-12]]
std: [1.00004445e-06 1.00009996e-06 1.00004445e-06]


In [14]:
x = np.array([[0,2],[1,3],[2,4],[3,5],[4,6],[5,7],[6,8],[7,9],[8,10],[9,11],[10,12]])
y = np.array([4,10,20,34,52,74,100,130,164,202,244])
dy = np.array([[0,4],[2,6],[4,8],[6,10],[8,12],[10,14],[12,16],[14,18],[16,20],[18,22],[20,24]])

x_test = np.array([[0,2],[1,3],[2,4]])
y_test = np.array([4,10,20])
dy_test = np.array([[22,26],[24,28],[26,20],[28,32]])
    
print(mu(a))

[4. 5. 6.]


In [15]:
gr = GaussianRegressorCholesky(rbf_kernel)
gr.fit(x,y)

# use the same data set as training, y_pred should be similar to y
y_pred, cov, std = gr.predict(x_test)

print('y is: ' + str(y_test))
print('y_pred is: ' + str(y_pred))
print('cov: \n' + str(cov))
print('std: ' + str(std))

y is: [ 4 10 20]
y_pred is: [ 4. 10. 20.]
cov: 
[[1.00008890e-12 0.00000000e+00 0.00000000e+00]
 [0.00000000e+00 1.00008890e-12 1.11022302e-16]
 [0.00000000e+00 1.11022302e-16 1.00008890e-12]]
std: [1.00004445e-06 1.00004445e-06 1.00004445e-06]


In [21]:
x = np.arange(-100,100,1).reshape(200,1)
y = np.array(2*x + 3).reshape(200,1)

x_test = np.arange(101,120,1).reshape(19,1)
y_test = np.array(2*x_test + 3).reshape(19,1)

gr = GaussianRegressorCholesky(rbf_kernel)
gr.fit(x,y)

# use the same data set as training, y_pred should be similar to y
y_pred, cov, std = gr.predict(x_test)

print('y is: ' + str(y_test))
print('y_pred is: ' + str(y_pred))
print('cov: \n' + str(cov))
print('std: ' + str(std))

y is: [[205]
 [207]
 [209]
 [211]
 [213]
 [215]
 [217]
 [219]
 [221]
 [223]
 [225]
 [227]
 [229]
 [231]
 [233]
 [235]
 [237]
 [239]
 [241]]
y_pred is: [[2.44595852e+01]
 [2.00156322e+00]
 [6.03943728e-02]
 [6.70758239e-04]
 [2.74100871e-06]
 [4.12081800e-09]
 [2.27913549e-12]
 [4.63730152e-16]
 [3.47110002e-20]
 [9.55817188e-25]
 [9.68251568e-30]
 [3.60833752e-35]
 [4.94688298e-41]
 [2.49494884e-47]
 [4.62910652e-54]
 [3.15964450e-61]
 [7.93386467e-69]
 [7.32886755e-77]
 [2.49054548e-85]]
cov: 
[[9.68842797e-01 6.03851971e-01 1.35253024e-01 1.11080771e-02
  3.35458862e-04 3.72664751e-06 1.52299766e-08 2.28973478e-11
  1.26641655e-14 2.57675711e-18 1.92874985e-22 5.31109225e-27
  5.38018616e-32 2.00500878e-37 2.74878501e-43 1.38634329e-49
  2.57220937e-56 1.75568810e-63 4.40853133e-71]
 [6.03851971e-01 9.99768994e-01 6.06523558e-01 1.35335204e-01
  1.11089962e-02 3.35462627e-04 3.72665317e-06 1.52299797e-08
  2.28973485e-11 1.26641655e-14 2.57675711e-18 1.92874985e-22
  5.31109225e-27 5

In [57]:
import numpy as np

class GaussianRegressor:
    def __init__(self, kernel):
        self.kernel = kernel
        self.sigma = None
        self.X = None
        self.y = None

    def fit(self, X, y, sigma=1e-6):
        self.sigma = sigma
        self.X = X
        self.y = y

    def predict(self, Xtest):
        K = self.kernel(self.X, self.X) + self.sigma * np.eye(len(self.X))
        K_train_test = self.kernel(self.X, Xtest)
        #K_star = self.kernel(Xtest, self.X)
        K_test_test = self.kernel(Xtest, Xtest) + self.sigma * np.eye(len(Xtest))
        
        #mean = K_train_test.T @ cp.linalg.inv(K) @ self.y

        # mu(x_train) and mu(x_test) are not strictly required
        mean = mu(Xtest) + K_train_test.T @ np.linalg.inv(K) @ (self.y - mu(self.X))
        # mean = K_train_test.T @ np.linalg.inv(K) @ (self.y)

        # returned cov represents the uncertainty associated with the predicted mean at each test point
        #cov = K_test_test - K_train_test.T @ np.linalg.inv(K) @ K_train_test
        #std = np.sqrt(np.diag(cov))

        return mean

# Define the RBF kernel
# now X1 and X2 are m1-n and m2-n dimensional matrix
# X1 and X2 have the same column number n which denotes the number of featrues
# X1 and X2 have different first dimension m1 and m2 which denote number of data point
def rbf_kernel(X1, X2, length_scale=1.0):
    np.atleast_2d(X1)
    np.atleast_2d(X2)
    s1 = X1.shape
    s2 = X2.shape
    
    if (s1[1] != s2[1]):
        print("ERROR! RBF input matices must have the same number of columns!")
        return

    dists = np.sum((X1[:, np.newaxis] - X2) ** 2, axis=2)
    return np.exp(-dists / (2 * length_scale ** 2))

def mu(x):
    '''
    m = x.shape[0]
    n = x.shape[1]
    return (np.sum(x,axis=1) / n).reshape(m,1)
    '''
    return 2*x

x = np.arange(-100,100,1).reshape(200,1)
y = np.array(2*x + 3).reshape(200,1)

x_test = np.arange(101,120,1).reshape(19,1)
y_test = np.array(2*x_test + 3).reshape(19,1)

gr = GaussianRegressor(rbf_kernel)
gr.fit(x,y)

# use the same data set as training, y_pred should be similar to y
#y_pred = gr.predict(x)

#print('y is: ' + str(y_test))
#print('y_pred is: ' + str(y_pred))


k_x_x = rbf_kernel(x, x)
k_t_x = rbf_kernel(x_test, x)

#print(k_x_x)
#print(k_t_x)

mean = mu(x_test) + k_t_x @ np.linalg.inv(k_x_x) @ (y - mu(x))
    
print('mean is: '+ str(mean))    


mean is: [[112.36265161]
 [101.01217995]
 [100.03054683]
 [100.00033929]
 [100.00000139]
 [100.        ]
 [100.        ]
 [100.        ]
 [100.        ]
 [100.        ]
 [100.        ]
 [100.        ]
 [100.        ]
 [100.        ]
 [100.        ]
 [100.        ]
 [100.        ]
 [100.        ]
 [100.        ]]


In [39]:
from sklearn.gaussian_process.kernels import RBF

kernel = RBF(length_scale=1.0)

x = np.arange(-100,100,1).reshape(200,1)
y = np.array(2*x + 3).reshape(200,1)

x_test = np.arange(101,120,1).reshape(19,1)
y_test = np.array(2*x_test + 3).reshape(19,1)

k_x_x = kernel(x, x)
k_t_x = kernel(x_test, x)

k_x_x2 = rbf_kernel(x, x)
k_t_x2 = rbf_kernel(x_test, x)

print(k_x_x)
print(k_x_x2)
print(k_t_x)
print(k_t_x2)


[[1.         0.60653066 0.13533528 ... 0.         0.         0.        ]
 [0.60653066 1.         0.60653066 ... 0.         0.         0.        ]
 [0.13533528 0.60653066 1.         ... 0.         0.         0.        ]
 ...
 [0.         0.         0.         ... 1.         0.60653066 0.13533528]
 [0.         0.         0.         ... 0.60653066 1.         0.60653066]
 [0.         0.         0.         ... 0.13533528 0.60653066 1.        ]]
[[1.         0.60653066 0.13533528 ... 0.         0.         0.        ]
 [0.60653066 1.         0.60653066 ... 0.         0.         0.        ]
 [0.13533528 0.60653066 1.         ... 0.         0.         0.        ]
 ...
 [0.         0.         0.         ... 1.         0.60653066 0.13533528]
 [0.         0.         0.         ... 0.60653066 1.         0.60653066]
 [0.         0.         0.         ... 0.13533528 0.60653066 1.        ]]
[[0.00000000e+000 0.00000000e+000 0.00000000e+000 ... 3.35462628e-004
  1.11089965e-002 1.35335283e-001]
 [0.000

In [28]:
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel, WhiteKernel
import matplotlib.pyplot as plt

x = np.arange(-100,100,1).reshape(200,1)
y = np.array(2*x + 3).reshape(200,1)

x_test = np.arange(101,120,1).reshape(19,1)
y_test = np.array(2*x_test + 3).reshape(19,1)

# use a combined kernel
kernel = ConstantKernel(1.0) + ConstantKernel(1.0) * RBF(10) + WhiteKernel(5)
model = GaussianProcessRegressor(kernel=kernel)
model.fit(x, y)
y_pred_tr, y_pred_tr_std = model.predict(x, return_std=True)
y_pred_te, y_pred_te_std = model.predict(x_test, return_std=True)

print(y_pred_tr)
print(y_pred_te)


[[-196.99974572]
 [-194.99979944]
 [-192.99984719]
 [-190.99988941]
 [-188.99992651]
 [-186.9999589 ]
 [-184.99998694]
 [-183.00001098]
 [-181.00003135]
 [-179.00004839]
 [-177.00006236]
 [-175.00007358]
 [-173.00008229]
 [-171.00008875]
 [-169.00009317]
 [-167.0000958 ]
 [-165.00009683]
 [-163.00009644]
 [-161.00009483]
 [-159.00009214]
 [-157.00008854]
 [-155.00008416]
 [-153.00007915]
 [-151.00007361]
 [-149.00006767]
 [-147.00006141]
 [-145.00005494]
 [-143.00004833]
 [-141.00004166]
 [-139.00003501]
 [-137.00002842]
 [-135.00002196]
 [-133.00001567]
 [-131.0000096 ]
 [-129.00000377]
 [-126.99999822]
 [-124.99999298]
 [-122.99998806]
 [-120.99998348]
 [-118.99997927]
 [-116.99997541]
 [-114.99997192]
 [-112.9999688 ]
 [-110.99996604]
 [-108.99996365]
 [-106.99996162]
 [-104.99995994]
 [-102.9999586 ]
 [-100.99995759]
 [ -98.99995689]
 [ -96.99995649]
 [ -94.99995638]
 [ -92.99995653]
 [ -90.99995693]
 [ -88.99995757]
 [ -86.99995841]
 [ -84.99995946]
 [ -82.99996068]
 [ -80.9999620

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)
