# Bias analysis

In this document we wonder if the bias of ICP is the main factor of uncertainty, or if the covariance of the icp results for a registration pair are enough to model the uncertainty over the results.

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import json
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import pathlib
import paperplot
import scipy
import scipy.stats

In [None]:
def plot_gaussian(ax, mu, variance):
    sigma = np.sqrt(variance)
    xs = np.linspace(mu - 3*sigma, mu + 3*sigma, 100)
    
    ax.plot(xs, scipy.stats.norm.pdf(xs, loc=mu, scale=sigma), color='black')

In [None]:
def dataset_of_files(files):
    frames = []
    
    for file in files:
        entry = pathlib.Path(file)
        if entry.suffix == '.csv' and entry.is_file():
            frames.append(pd.read_csv(entry))
    
    dataset = pd.concat(frames)
    return dataset

In [None]:
files = [
    '/home/dlandry/dataset/recov/2017-05-17-apartment.csv',
    '/home/dlandry/dataset/recov/2017-05-17-gazebo-summer.csv',
    '/home/dlandry/dataset/recov/2017-05-17-gazebo-winter.csv',
    '/home/dlandry/dataset/recov/2017-05-17-hauptgebaude.csv',
    '/home/dlandry/dataset/recov/2017-05-17-plain.csv',
    '/home/dlandry/dataset/recov/2017-05-17-wood-autumn.csv',
    '/home/dlandry/dataset/recov/2017-05-17-wood-summer.csv']

dataset = dataset_of_files(files)
len(dataset)

In [None]:
dataset[0:3]['dataset']

# Report figures

In [None]:
paperplot.setup()
fig = paperplot.paper_figure(396, 540)

axes_icp = {}
axes_icp['bias_x'] = fig.add_subplot(4,2,1)

first_ax = axes_icp['bias_x']

#axes_icp['bias_x'].set_xlim([-0.3, 0.3])

axes_icp['bias_y'] = fig.add_subplot(4,2,3, sharey=first_ax)
axes_icp['bias_z'] = fig.add_subplot(4,2,5, sharey=first_ax)
axes_icp['bias_theta'] = fig.add_subplot(4,2,7, sharey=first_ax)

axes_ndt = {}
axes_ndt['bias_x'] = fig.add_subplot(4,2,2, sharey=first_ax)
axes_ndt['bias_y'] = fig.add_subplot(4,2,4, sharey=first_ax)
axes_ndt['bias_z'] = fig.add_subplot(4,2,6, sharey=first_ax)
axes_ndt['bias_theta'] = fig.add_subplot(4,2,8, sharey=first_ax)

name_of_axes = {'bias_x': 'x axis',
               'bias_y': 'y axis',
               'bias_z': 'z axis',
               'bias_theta': 'around z axis'}

for variable in ['bias_x', 'bias_y', 'bias_z', 'bias_theta']:
    dataset[dataset['algorithm'] == 'icp'].hist(column=variable, bins=300, ax=axes_icp[variable], color='0.4', range=[-0.1, 1.0])
    dataset[dataset['algorithm'] == 'ndt'].hist(column=variable, bins=300, ax=axes_ndt[variable], color='0.4', range=[-0.1, 1.0])
    
    axes_icp[variable].set_title('')
    axes_ndt[variable].set_title('')
    
    axes_icp[variable].set_xlim([-0.1, 0.1])
    axes_ndt[variable].set_xlim([-0.1, 0.1])
    
    if variable != 'bias_theta':
        axes_icp[variable].set_xlabel('Bias (m)')
        axes_ndt[variable].set_xlabel('Bias (m)')
    else:
        axes_icp[variable].set_xlabel('Bias (rad)')
        axes_ndt[variable].set_xlabel('Bias (rad)')
    
    axes_icp[variable].set_ylabel(name_of_axes[variable])

axes_icp['bias_x'].set_title('ICP Algorithm')
axes_ndt['bias_x'].set_title('NDT Algorithm')
    
plt.savefig('fig.pdf')
plt.show()

In [None]:
paperplot.setup()
fig = paperplot.paper_figure(190, 200)
ax = fig.add_subplot(1,1,1)

apartment_dataset = dataset[dataset['dataset'] == 'apartment']
apartment_dataset = apartment_dataset[apartment_dataset['algorithm'] == 'icp']

apartment_dataset.hist(column='bias_x', color='0.6', ax=ax)
ax.set_title('')
ax.set_xlabel('x axis (m)')
ax.set_ylabel('Number of samples')
plt.savefig('fig.pdf')
plt.show()

In [None]:
icp_dataset = dataset[dataset['algorithm'] == 'icp']
icp_dataset[icp_dataset['dataset'] == 'apartment'].hist(column='bias_x', bins=20, by='algorithm')
plt.show()

In [None]:
icp_dataset[icp_dataset['dataset'] == 'apartment']['bias_x'].mean()

# Exploration

In [None]:
small_distance_mask = dataset['reading'] - dataset['reference'] < 3
dataset[small_distance_mask].hist(column='bias_x', bins=50, figsize=(20,10), by='dataset')
plt.show()

In [None]:
dataset[dataset['dataset'] != 'apartment'].hist(column='bias_x', by='algorithm', bins=50)
dataset[dataset['dataset'] != 'apartment'].hist(column='bias_x', by='dataset', bins=50)
plt.show()

In [None]:
dataset[dataset['dataset'] != 'apartment'].hist(column='bias_y', by='algorithm', bins=50)
dataset[dataset['dataset'] != 'apartment'].hist(column='bias_y', by='dataset', bins=50)
plt.show()

In [None]:
dataset[dataset['dataset'] != 'apartment'].hist(column='bias_theta', by='algorithm', bins=50)
dataset[dataset['dataset'] != 'apartment'].hist(column='bias_theta', by='dataset', bins=50)
plt.show()

In [None]:
dataset[dataset['dataset'] != 'apartment'][['bias_x', 'bias_y', 'bias_z', 'bias_theta']].std()

In [None]:
np.sqrt(dataset[dataset['dataset'] != 'apartment'][['var_x', 'var_y', 'var_z', 'var_theta']].mean())

In [None]:
bars_x = []
bars_y = []
bars_theta = []
tick_labels = []

for name, group in dataset[dataset['dataset'] != 'apartment'].groupby('dataset'):
    print(name)
    tick_labels.append(name)
    bars_x.append(group['bias_x'].std())
    bars_y.append(group['bias_y'].std())
    bars_theta.append(group['bias_theta'].std())
    print(group[['bias_x', 'bias_y', 'bias_z', 'bias_theta']].std())

width = 0.35
ind = np.arange(len(dataset[dataset['dataset'] != 'apartment'].groupby('dataset')))

fig, ax = plt.subplots(figsize=(20,10))
ax.bar(np.arange(len(bars_x)), bars_x, width)
ax.bar(np.arange(len(bars_x)) + width, bars_theta, width)

ax.set_xticks(ind + width / 2)
ax.set_xticklabels(tick_labels)

plt.show()

In [None]:
fig, ax = plt.subplots()
ax.set_ylim([0., 0.02])
dataset[dataset['dataset'] != 'apartment'].boxplot(column='var_x', ax=ax)
ax.axhline(y=variance)
plt.show()

In [None]:
fig, ax = plt.subplots()
xs = dataset[dataset['dataset'] != 'apartment']['bias_x']
ys = np.sqrt(dataset[dataset['dataset'] != 'apartment']['var_x'])
ax.scatter(xs, ys, s=1)
ax.axis('equal')
ax.set_ylim([0., 0.01])

sorted_xs = np.sort(xs)

ax.plot(sorted_xs, np.abs(sorted_xs))
plt.show()

In [None]:
mean = dataset[small_distance_mask]['bias_x'].mean()
variance = dataset[small_distance_mask]['bias_x'].var()

In [None]:
mean

In [None]:
variance

In [None]:
dataset['bias_x'].plot(kind='box')
plt.show()

In [None]:
fig, ax = plt.subplots()
plot_gaussian(ax, mean, variance)
plt.show()

In [None]:
ax = dataset[dataset['dataset'] != 'apartment']['bias_x'].hist(bins=30)
plot_gaussian(ax, mean, variance)
plt.show()

In [None]:
dataset_noap = dataset[dataset['dataset'] != 'apartment']
dataset_noap = dataset_noap[dataset_noap['reading'] - dataset_noap['reference'] == 1]

In [None]:
np.count_nonzero(np.abs(dataset_noap['bias_x']) > 2*np.sqrt(dataset_noap['var_x'])) / dataset['bias_x'].count()

In [None]:
np.count_nonzero(np.abs(dataset_noap['bias_theta']) > 2*np.sqrt(dataset_noap['var_theta'])) / dataset['bias_theta'].count()

In [None]:
dataset_noap[dataset_noap['bias_x'] > 0.01][['dataset', 'reading', 'reference', 'bias_x', 'algorithm']]

In [None]:
dataset_noap['bias_x'].max()

In [None]:
dataset.groupby('algorithm').mean()

In [None]:
dataset.groupby('algorithm').median()

In [None]:
dataset.groupby('algorithm').var()

In [None]:
for name, group in dataset.groupby('algorithm'):
    print(name)
    print(group['bias_theta'].std())

In [None]:
# Average std dev of the registration
for subset in ['apartment', 'gazebo_summer', 'plain']:
    print('===== {} ===='.format(subset))
    for name, group in dataset[dataset['dataset'] == subset].groupby('algorithm'):
        print('=== {} ==='.format(name))
        for variable in ['bias_x', 'bias_y', 'bias_z', 'bias_theta']:
            print(variable)
            print('Median: {:.4f}'.format(round(group[variable].median(), 4)))
            print('Mean: {:.4f}'.format(round(group[variable].mean(), 4)))
            print('Std dev: {:.4f}'.format(round(np.sqrt(group[variable].var()), 4)))
        print()

In [None]:
for name, group in dataset.groupby('algorithm'):
    print('=== {} ==='.format(name))
    for variable in ['bias_x', 'bias_y', 'bias_z', 'bias_theta']:
        print(variable)
        print('Median: {:.4f}'.format(round(group[variable].median(), 4)))
        print('Mean: {:.4f}'.format(round(group[variable].mean(), 4)))
        print('Std dev: {:.4f}'.format(round(np.sqrt(group[variable].var()), 4)))
    print()