# Visualization of dataset and simple GPR for Zeiss february slides
This notebook simply visualizes the dataset, fits a GPR on the downsampled dataset with
previously found hyperparameters (GridSearchCV), and creates a plot of the model on the data.

In [None]:
from glob import glob
from os.path import join
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.utils import resample
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF
from sklearn.metrics import mean_squared_error, make_scorer
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px

## Data choice

In [None]:
experiment = 'february'

In [None]:
fruit = 'orange'

In [None]:
cut_type = 'cut'

In [None]:
train_percentage = 0.7
max_size_trainset = 1000

## Segmented data reading

In [None]:
data_folder = join('../..', 'data', 'segmented_data', experiment, fruit)
data_files = os.listdir(data_folder)
print(data_files)

In [None]:
# list of dataframes with segmented data
all_data = []
all_data_df = pd.DataFrame()
total_samples = 0

for k, file in enumerate(data_files):
    if cut_type not in file:
        continue
    if file in ['segmented_orange_cut_32_0.070000_0.006000_0.040000_0.005000.csv',
                'segmented_orange_cut_33_0.070000_0.005000_0.050000_0.005000.csv']:
        continue
    if file.split('_')[5] != '0.005000':
        continue
    data = pd.read_csv(join(data_folder, file))
    data = data[data['phase'] == 1]
    data = data[data['displacement'] < 0.04]
    #data = data[['displacement', 'ee_force_x', 'label']]
    #data['ee_force_x'] *= -1
    total_displacement = data['displacement'][data['ee_force_x'] > -0.2].iloc[0]
    data['displacement'] = (data['displacement'] - data['displacement'].iloc[0]) / total_displacement * 0.9
    total_samples += len(data.index)
    all_data.append(data)
    all_data_df = pd.concat([all_data_df, data])

nb_runs = len(all_data)
#print(all_data_df)
del k, file
plt.plot(all_data_df['displacement'], all_data_df['ee_twist_lin_x'], '.')
plt.plot(all_data_df['displacement'], all_data_df['ee_twist_lin_y'], '.')
plt.plot(all_data_df['displacement'], all_data_df['ee_twist_lin_z'], '.')
plt.show()
plt.plot(all_data_df['displacement'], all_data_df['ee_force_x'], '.')
plt.plot(all_data_df['displacement'], all_data_df['ee_force_y'], '.')
plt.plot(all_data_df['displacement'], all_data_df['ee_force_z'], '.')
plt.show()

## Downsample runs

In [None]:
# desired_headers = ['displacement', 'force_x', 'force_y', 'force_z']
# downsampled_data = pd.DataFrame(columns=desired_headers)
#
# nb_points_run = int(np.ceil(max_size_trainset / nb_runs))
# for run in all_data:
#     tmp = pd.DataFrame(columns=desired_headers)
#     for header in desired_headers:
#         tmp[header] = resample(run[header], replace=False, n_samples=nb_points_run, random_state=42)
#     downsampled_data = pd.concat([downsampled_data, tmp])
#     # plt.plot(tmp['displacement'], tmp['force_x'], '.')
#     # plt.show()
#
# plt.plot(downsampled_data['displacement'], downsampled_data['force_x'], '.')
# plt.show()
#
# del run, tmp

## Downsample dataset


In [None]:
desired_headers = ['displacement', 'ee_force_x', 'ee_force_z', 'ee_twist_lin_x', 'ee_twist_lin_z']

downsampled = resample(all_data_df[desired_headers].values, replace=False, n_samples=max_size_trainset, random_state=42)
downsampled_data = pd.DataFrame(data=downsampled, columns=desired_headers)

plt.plot(downsampled_data['ee_twist_lin_x'], downsampled_data['ee_force_x'], '.')
plt.show()
plt.plot(downsampled_data['ee_twist_lin_z'], downsampled_data['ee_force_x'], '.')
plt.show()

del downsampled

In [None]:
fig = px.scatter(downsampled_data, x="ee_twist_lin_x", y="ee_twist_lin_z", color="ee_force_x")
fig.show()

## GPR with Grid Search CV on downsampled displacement-cutting force

In [None]:
scaler = StandardScaler()
y_normalized =scaler.fit_transform(downsampled_data['ee_force_x'].values.reshape(-1,1)).flatten()
export_data = pd.DataFrame()
export_data['ee_twist_lin_x'] = downsampled_data['ee_twist_lin_x']
export_data['ee_twist_lin_z'] = downsampled_data['ee_twist_lin_z']
export_data['ee_force_x'] = y_normalized
export_data.to_csv(join(data_folder, 'train_data_2d.csv'), index=False)
X_ = np.atleast_2d(downsampled_data[['ee_twist_lin_x', 'ee_twist_lin_z']].values)
y_ = np.atleast_2d(y_normalized).T.ravel()

scorer = make_scorer(mean_squared_error, greater_is_better=False)
rbf_kernel = RBF(length_scale_bounds='fixed')
gpr = GaussianProcessRegressor(kernel=rbf_kernel,
                               normalize_y=False,
                               n_restarts_optimizer=10,
                               random_state=42)
# # config = {'alpha': [0.01, 0.05, 0.1, 0.4, 1/np.sqrt(2), 0.9],
config = {'alpha': [100, 50, 30, 20, 15, 10, 5, 2, 1, 1/np.sqrt(2), 0.5, 0.1, 0.01, 0.001],
          'kernel__length_scale': [0.001, 0.01, 0.1, 0.125, 0.15, 0.2]}
search = GridSearchCV(estimator=gpr,
                      param_grid=[config],
                      scoring=scorer,
                      n_jobs=-1,
                      verbose=0)
search.fit(X_, y_)
print(search.best_params_)
#print(search.cv_results_)

In [None]:
rbf_kernel = RBF(search.best_params_['kernel__length_scale'], length_scale_bounds='fixed')
#rbf_kernel = RBF(0.1, length_scale_bounds='fixed')
gpr = GaussianProcessRegressor(kernel=rbf_kernel,
                               normalize_y=False,
                               alpha=search.best_params_['alpha'],
                               #alpha=10,
                               n_restarts_optimizer=0,
                               random_state=42)
gpr.fit(X_,y_)
print(gpr.get_params())

gpr.score(X_, y_)
print(mean_squared_error(y_, gpr.predict(X_)))
res, std = gpr.predict(X_, return_std=True)
df = pd.DataFrame()
df['x1'] = [x[0] for x in X_]
df['x2'] = [x[1] for x in X_]
df['y'] = res
df['std'] = std
df = df.sort_values(by='x1')
plt.plot(x1, y_, '.')
plt.plot(df['x1'], df['y'])
plt.plot(df['x1'], df['y'] + 2*np.sqrt(df['std']))
plt.plot(df['x1'], df['y'] - 2*np.sqrt(df['std']))
plt.rcParams["figure.figsize"] = (10,10)
plt.show()
df = df.sort_values(by='x2')
plt.plot(x2, y_, '.')
plt.plot(df['x2'], df['y'])
plt.plot(df['x2'], df['y'] + 2*np.sqrt(df['std']))
plt.plot(df['x2'], df['y'] - 2*np.sqrt(df['std']))
plt.rcParams["figure.figsize"] = (10,10)
plt.show()

In [None]:
x_force_dis_fig_grey.add_trace(go.Scatter(
        x=X_.ravel()/10,
        y=scaler.inverse_transform(res + 2*np.sqrt(std)),
        fill=None,
        line=dict(color='black', dash='dash'),
        # opacity=.5,
        showlegend=False))
x_force_dis_fig_grey.add_trace(go.Scatter(
        x=X_.ravel()/10,
        y=scaler.inverse_transform(res - 2*np.sqrt(std)),
        fill='tonexty',
        line=dict(color='black', dash='dash'),
        name='&plusmn; 2*\u03C3 range' ))
x_force_dis_fig_grey.add_trace(go.Scatter(
        x=X_.ravel()/10,
        y=scaler.inverse_transform(res),
        line=dict(color='red', width=4),
        name='Predictive mean'))

x_force_dis_fig_grey.update_layout(
    height=350,
    width=1000,
    title={
        'text': "Robot orange cut, GPR trained on 1000 data points with 5-Fold CV",
        'x':0.5,
        'xanchor': 'center'},
    xaxis_title="Relative displacement from cut start [m]",
    yaxis_title="Cutting force [N]",
    font=dict(
        size=18,
    )
)

x_force_dis_fig_grey.show()