In [None]:
import ast
import os
os.chdir(os.pardir)

import numpy as np
import pandas as pd
from sklearn.metrics import make_scorer
from sklearn.model_selection import GridSearchCV
from tqdm.auto import tqdm

from src import load_point_cloud
from src import show_point_cloud
from src import rms_angle_error

from estimator import NormalEstimator

# Baseline

In [None]:
xyz, n = load_point_cloud('Cup33100k_ddist_minmax')

In [None]:
_, ax = show_point_cloud(xyz, n)

In [None]:
k = 30
s = k - np.sqrt(2 * k)
s

In [None]:
estimator = NormalEstimator(k=k, deg=3, s=s, kernel=None, gamma=None)

In [None]:
estimator = estimator.fit(xyz)

In [None]:
n_estim = estimator.predict(xyz)

In [None]:
angle_err = rms_angle_error(estimator.predict(xyz), n, orient=False)
print(f'Unoriented normal angle error = {angle_err:.4f}°')

# Hyperparameter tuning

Training set only (validation is not possible in this case).
Validation set will be omitted.

In [None]:
data_path = 'data'

In [None]:
with open(os.path.join(data_path, 'trainingset_no_noise.txt')) as f:
    trainingset_no_noise = [line.strip() for line in f]
with open(os.path.join(data_path, 'trainingset_vardensity_whitenoise.txt')) as f:
    trainingset_vardensity_whitenoise = [line.strip() for line in f]
with open(os.path.join(data_path, 'trainingset_vardensity.txt')) as f:
    trainingset_vardensity = [line.strip() for line in f]
with open(os.path.join(data_path, 'trainingset_whitenoise.txt')) as f:
    trainingset_whitenoise = [line.strip() for line in f]

In [None]:
trainingset_all = (trainingset_no_noise
                   + trainingset_vardensity_whitenoise
                   + trainingset_vardensity
                   + trainingset_whitenoise)

In [None]:
X_train_d = {}
y_train_d = {}
for trainingset in set(trainingset_all):
    xyz, n = load_point_cloud(trainingset)
    X_train_d[trainingset] = xyz
    y_train_d[trainingset] = n

In [None]:
param_grid = [{'k': [10, 30],
               'deg': [2, 3],
               's': [None],
               'kernel': [None]},
              {'k': [30, 50, 100],
               's': [None], 
               'kernel': ['gaussian', 'inverse_multiquadric'],
               'gamma': [0.1, 0.3, 1]},
              {'k': [30, 50, 100],
               's': [None, 10, 0], 
               'kernel': ['rbf'],
               'gamma': [1, 3, 10]}]

In [None]:
scorer = lambda y, y_pred: rms_angle_error(y_pred, y, orient=False)
custom_scorer = make_scorer(scorer, greater_is_better=False)

In [None]:
def tuning():
    df_grid_res = pd.DataFrame()
    for key in tqdm(X_train_d.keys()):
        grid = GridSearchCV(estimator=NormalEstimator(),
                            param_grid=param_grid,
                            scoring=custom_scorer,
                            n_jobs=-1,
                            cv=[(slice(None), slice(None))],
                            verbose=2)
        grid.fit(X_train_d[key], y_train_d[key])
        df_temp = pd.DataFrame(grid.cv_results_)
        df_temp['dataset'] = key
        df_grid_res = pd.concat((df_grid_res, df_temp), ignore_index=True)
        return df_grid_res

In [None]:
force_train = False
save = False
fname = os.path.join('estimator', 'grid_res.csv')

if force_train:
    print('Optimization started...')
    df_grid_res = tuning()
else:
    try:
        print('Trying to restore the grid...')
        df_grid_res = pd.read_csv(fname, index_col=0)
        df_grid_res['params'] = df_grid_res['params'].apply(
            lambda row: ast.literal_eval(row)
        )
        print('Restoring successful.')
    except Exception as e:
        print(e)
        print('Restoring failed. Fitting the surrogate model...')
        df_grid_res = tuning()
if save:
    df_grid_res.to_csv(fname)

In [None]:
df_grid_res[sel].set_index(
    df_grid_res['params'].apply(
        lambda x: '_'.join(str(val) for val in x.values())
    )
).rename_axis('kernel')

In [None]:
sel = ['params', 'mean_test_score', 'rank_test_score', 'dataset']
rank_test_score = 1
df_grid_res.loc[
    df_grid_res['rank_test_score'] == rank_test_score, sel
].sort_values(by='dataset')

In [None]:
# TBA
#
# extract best performing parameter combination for each training-set group
#
# important groups: no noise, low noise (1e-2), med noise (5e-2), high noise
# (1e-1), vardensity striped (minmax_layers), vardensity gradients (minmax)
#
# values should be summarized in the dataset and stored to be able to be read
# in test.ipynb for evaluation the performance on corresponding test-set groups