# Different visualizations and GPR trials for orange cuts

In [None]:
from glob import glob
from os.path import join
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pickle
from sklearn.utils import resample
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel, WhiteKernel
from sklearn.metrics import mean_squared_error, make_scorer
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px

## Data choice

In [None]:
fruit = 'orange'

In [None]:
cut_type = 'cut'

## Read data sets

In [None]:
pickle_in = open('splits/splits_' + fruit + '_' + cut_type + 's.pickle', 'rb')
splits_dict = pickle.load(pickle_in)

pickle_in = open('segmented_' + cut_type + 's_' + fruit + '.pickle', 'rb')
segmented_runs = pickle.load(pickle_in)

In [None]:
x_force_displacement_fig = go.Figure()
x_force_dis_fig_grey = go.Figure()

total_samples = 0
for k, (name, r) in enumerate(segmented_runs.items()):
    if name.split('_')[5] != '0.005000':
        continue
    total_samples += len(r.index)
    x_force_displacement_fig.add_trace(go.Scatter(
            x=1e3*r['displacement'],
            y=-r['ee_force_x'],
            # mode='markers',
            showlegend=False))
    if k == 0:
        x_force_dis_fig_grey.add_trace(go.Scatter(
            x=1e3*r['displacement'],
            y=-r['ee_force_x'],
            line=dict(color='grey'),
            opacity=.5,
            name='Training data'))
    else:
        x_force_dis_fig_grey.add_trace(go.Scatter(
            x=1e3*r['displacement'],
            y=-r['ee_force_x'],
            line=dict(color='grey'),
            opacity=.5,
            name='training',
            showlegend=False))


x_force_displacement_fig.update_layout(
    width=1000,
    height=350,
    title={
        'text': "Robot orange cuts, " + str(k+1) + " runs, " + str(total_samples) + " datapoints",
        'x':0.5,
        'xanchor': 'center'},
    xaxis_title="Arc length from cut start [mm]",
    yaxis_title="Cutting force [N]",
    font=dict(
        # family="Courier New, monospace",
        size=18,
    )
)
x_force_displacement_fig.update_yaxes(range=[0, 6])
x_force_displacement_fig.show()

x_force_dis_fig_grey.update_layout(
    title={
        'text': '-'.join([fruit, cut_type]) + ", GPR trained on 1000 data points with 5-Fold CV",
        'x':0.5,
        'xanchor': 'center'},
    xaxis_title="Arc length from cut start [mm]",
    yaxis_title="Cutting force [N]",
    font=dict(
        size=18,
    )
)
x_force_dis_fig_grey.update_yaxes(range=[0, 6])
#x_force_dis_fig_grey.show()

## Normalize and sort data

In [None]:
scaler = StandardScaler()

sorting = np.argsort(1e3*splits_dict['X_train'])
X_train = 1e3*splits_dict['X_train'][sorting]
X_train = np.atleast_2d(X_train).T
y_train_norm = scaler.fit_transform(splits_dict['y_train'].reshape(-1,1))
y_train_norm = y_train_norm[sorting].T.ravel()

sorting = np.argsort(1e3*splits_dict['X_test'])
X_test = 1e3*splits_dict['X_test'][sorting]
X_test = np.atleast_2d(X_test).T
y_test = np.atleast_2d(splits_dict['y_test'][sorting]).T.ravel()

## GPR fit on downsampled displacement-cutting force

In [None]:
kernel = RBF(0.05, length_scale_bounds='fixed') + WhiteKernel(0.05, noise_level_bounds='fixed')
#kernel = RBF(3) + WhiteKernel(1)
gpr = GaussianProcessRegressor(kernel=kernel,
                               normalize_y=False,
                               n_restarts_optimizer=10,
                               random_state=42)
gpr.fit(X_train, y_train_norm)

print(mean_squared_error(y_train_norm, gpr.predict(X_train)))
print(mean_squared_error(y_test, scaler.inverse_transform(gpr.predict(X_test).reshape(-1,1)).ravel()))
print(gpr.score(X_test, scaler.transform(y_test.reshape(-1,1)).ravel()))
print(gpr.kernel_)
res, std = gpr.predict(X_test, return_std=True)
plt.plot(X_train, y_train_norm, '.')
plt.plot(X_test, res)
plt.plot(X_test, res + 2*(std))
plt.plot(X_test, res - 2*(std))
plt.legend(['train data', 'mean', 'mean + 2*std', 'mean - 2*std'])
plt.show()

## GPR with Grid Search CV on downsampled displacement-cutting force

In [None]:
sigmas = [0.01, 0.1, 0.5, 1, 5, 10, 25, 50, 100]
lengthscales = [0.1, 0.5, 1, 5, 10, 20]

train_mse = pd.DataFrame(columns=sigmas, index=lengthscales)
test_mse = pd.DataFrame(columns=sigmas, index=lengthscales)

for ls in lengthscales:
    for sigma in sigmas:
        gpr = GaussianProcessRegressor(kernel=RBF(ls, length_scale_bounds='fixed') + WhiteKernel(sigma, noise_level_bounds='fixed'),
                                       normalize_y=True,
                                       n_restarts_optimizer=0,
                                       random_state=42)
        gpr.fit(X_train, y_train_norm) 
        test_mse[sigma][ls] = mean_squared_error(y_train_norm, gpr.predict(X_train))
        train_mse[sigma][ls] = mean_squared_error(y_test, scaler.inverse_transform(gpr.predict(X_test).reshape(-1,1)).ravel())


subplot_titles = ["lengthscale " + str(x) for x in lengthscales]
fig = make_subplots(rows=3, cols=2, x_title='Sigma', shared_xaxes=True, 
                    subplot_titles=subplot_titles)
for i in range(3):
    for j in range(2):
        idx = i * 2 + j
        fig.append_trace(go.Scatter(
            x=sigmas,
            y=[train_mse[sigma][lengthscales[idx]] for sigma in sigmas],
            showlegend=False,
            line=dict(color='red')
        ), row=i+1, col=j+1)
        fig.append_trace(go.Scatter(
            x=sigmas,
            y=[test_mse[sigma][lengthscales[idx]] for sigma in sigmas],
            showlegend=False,
            line=dict(color='blue')
        ), row=i+1, col=j+1)

fig.update_xaxes(type="log")
fig.update_yaxes(range=[0.6, 1])
fig.update_layout(height=600, width=800)
fig.show()

subplot_titles = ["sigma " + str(x) for x in sigmas]
fig = make_subplots(rows=4, cols=2, x_title='Lengthscale', shared_xaxes=True, 
                    subplot_titles=subplot_titles)
for i in range(4):
    for j in range(2):
        idx = i * 2 + j
        fig.append_trace(go.Scatter(
            x=lengthscales,
            y=[train_mse[sigmas[idx]][ls] for ls in lengthscales],
            showlegend=False,
            line=dict(color='red')
        ), row=i+1, col=j+1)
        fig.append_trace(go.Scatter(
            x=lengthscales,
            y=[test_mse[sigmas[idx]][ls] for ls in lengthscales],
            showlegend=False,
            line=dict(color='blue')
        ), row=i+1, col=j+1)

fig.update_xaxes(type="log")
fig.update_yaxes(range=[0.6, 1])
fig.update_layout(height=600, width=800)
fig.show()

x, y, sizes = [], [], []
for sigma in sigmas:
    x.extend([sigma] * len(lengthscales))
    y.extend(lengthscales)
    sizes.extend(train_mse[sigma].values.tolist())
    
fig = go.Figure(data=go.Scatter(
    x=x,
    y=y,
    mode='markers',
    marker=dict(size=sizes)
))
#fig.show()


In [None]:
scorer = make_scorer(mean_squared_error, greater_is_better=False)
kernel = RBF(length_scale_bounds='fixed') + WhiteKernel(noise_level_bounds='fixed')
#kernel = RBF() + WhiteKernel()
gpr = GaussianProcessRegressor(kernel=kernel,
                               normalize_y=False,
                               n_restarts_optimizer=10,
                               random_state=42)

config = {'kernel__k1__length_scale': [0.1, 0.5, 1, 5, 10, 20],
          'kernel__k2__noise_level': [20, 15, 10, 6, 5, 2, 1, 0.5, 0.1]}
search = GridSearchCV(estimator=gpr,
                      param_grid=[config],
                      scoring=scorer,
                      n_jobs=-1,
                      verbose=0)
search.fit(X_train, y_train_norm)
print(search.best_params_)

In [None]:
rbf_kernel = RBF(search.best_params_['kernel__k1__length_scale'], length_scale_bounds='fixed')
white_kernel = WhiteKernel(search.best_params_['kernel__k2__noise_level'], noise_level_bounds='fixed')
gpr = GaussianProcessRegressor(kernel=rbf_kernel + white_kernel,
                               normalize_y=False,
                               n_restarts_optimizer=0,
                               random_state=42)
gpr.fit(X_train,y_train_norm)
print(gpr.get_params())

print(gpr.score(X_test, scaler.transform(y_test.reshape(-1,1)).ravel()))
print(mean_squared_error(y_test, scaler.inverse_transform(gpr.predict(X_test))))
res, std = gpr.predict(X_test, return_std=True)
plt.plot(X_train, y_train_norm, '.')
plt.plot(X_test, res)
plt.plot(X_test, res + 2*(std))
plt.plot(X_test, res - 2*(std))
plt.rcParams["figure.figsize"] = (10,10)
plt.legend(['train data', 'mean', 'mean + 2*std', 'mean - 2*std'])
plt.show()