In [19]:
################ Library imports ################
import numpy as np
import math as math
import matplotlib.pyplot as plt
import pandas as pd
import glob

from matplotlib import pyplot as plt

from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel, WhiteKernel

from sklearn.metrics import mean_squared_error, mean_absolute_error

from scipy.optimize import curve_fit

In [20]:
"""
File reading
"""
path = '/Users/carlogattuso/Desktop/TFG/Google - Orientacion vertical' # use your path
all_files = glob.glob(path + "/*.csv")

li = []

for filename in all_files:
    df = pd.read_csv(filename, index_col = None, sep = ",", header = None)
    li.append(df)

frame = pd.concat(li, axis=0, ignore_index=True)

In [21]:
# ----------------------------------------------------------------------
#  AP locations
L_ap = np.array([[0, 0], [0, 2], [2, 0], [2, 2]])

# ----------------------------------------------------------------------
#  VRP locations
L_vrp = np.array([[1, 0], [0, 1], [1, 1], [1, 2], [2, 1]])

# ----------------------------------------------------------------------
#  RTT distance observations
d_RTT = np.array([0, 2, 2, math.sqrt(8)])

In [22]:
"""
Two states model: Least Squares
m1 --> first slope
n1 --> first ordinate
m2 --> second slope
n2 --> second ordinate
x --> data
"""
def two_lin(x, m1, n1, m2, n2):
    return np.min([m1 * x + n1, m2 * x + n2], axis = 0)

In [23]:
"""
Array definition and curve fitting
"""
real_distance = np.array(frame.loc[:, 1])
estimated_distance = np.array(frame.loc[:, 2])
#x = np.array([0, 1, 2,  3,  4,  5,  6,  7,  8,  9,  10])
#y = np.array([2, 4, 8, 12, 14, 18, 20, 21, 22, 23,  24])

"""
Initial values
"""
start_values = [3, 0, 0, 10]
fit_param, pcov = curve_fit(two_lin, real_distance, estimated_distance, p0 = start_values)

In [24]:
"""
Output values for slopes/ordinates
"""
m1, n1, m2, n2 = fit_param
print("Values: ", m1, n1, m2, n2)

Values:  1.6260619505325347 -3.0670180013313364 0.9789604129755792 -0.33674919534231956


In [25]:
"""
Gaussian Process Kernel definition

Kernel parameters:
    - L --> Length Scale
    - sigma_f --> Constant Kernel Amplitude
"""

l = 2.47
sigma_f = 4

kernel = ConstantKernel(constant_value=sigma_f,constant_value_bounds=(1e-3, 1e3)) \
            * RBF(length_scale=l, length_scale_bounds=(1e-3, 1e3))  

In [26]:
"""
Gaussian Process Regressor definition

- sigma_n --> Training Noise Variance
"""

# Error standard deviation. 
sigma_n = 0.2

gp = GaussianProcessRegressor(kernel=kernel, alpha=sigma_n**2, n_restarts_optimizer=0)

In [27]:
"""
Model Fitting with AP location and RTT distance observations
using Maximum Likelihood Estimation of the parameters.
"""

gp.fit(L_ap, d_RTT)

GaussianProcessRegressor(alpha=0.04000000000000001,
                         kernel=2**2 * RBF(length_scale=2.47))

In [28]:
"""
Prediction of the VRPs around the scenario
"""
d_RTT_pred, d_RTT_err = gp.predict(L_vrp, return_std=True)

print(d_RTT_pred)
print(d_RTT_err)

[1.08011434 1.08011434 1.950846   2.56356667 2.56356667]
[0.27477844 0.27477844 0.34364477 0.27477844 0.27477844]


In [29]:
"""
Optimized parameters of the Kernel
"""
gp.kernel_.get_params()

{'k1': 2**2,
 'k2': RBF(length_scale=2.47),
 'k1__constant_value': 4.001438373410803,
 'k1__constant_value_bounds': (0.001, 1000.0),
 'k2__length_scale': 2.4737043968015033,
 'k2__length_scale_bounds': (0.001, 1000.0)}

In [30]:
"""
Log Marginal Likelihood of the obtained parameters
"""
gp.log_marginal_likelihood()

-7.062481224144337

In [31]:
"""
Mean squared error and variance of the prediction
"""
d_RTT_true = np.array([1, 1, math.sqrt(2), math.sqrt(5), math.sqrt(5)])
print("MSE: ", mean_squared_error(d_RTT_true, d_RTT_pred))
print("ME: " , mean_absolute_error(d_RTT_true, d_RTT_pred))
print("STD: " , np.std(d_RTT_true-d_RTT_pred))

MSE:  0.10306435508421416
ME:  0.27037170221778745
STD:  0.17309967569025148
