## Relation of Variance and Predictive Error CPL
Author: Clara Hoffmann

In [None]:
# load packages
import numpy as np
import pandas as pd
from scipy.stats import norm
from tqdm import tqdm
import matplotlib.pyplot as plt
from helpers import (Fy, find_closest_element, error_vs_variance)
from density_predictor_class_val import density_predictor

In [None]:
# read in val data
B_zeta = np.load('../../../../data/commaai/extracted_coefficients/20201027_filtered_gaussian_resampled/Bzeta/B_zeta_val.npy')
B_zeta = B_zeta.reshape(B_zeta.shape[0], B_zeta.shape[2])
beta = np.genfromtxt('../../../../data/commaai/extracted_coefficients/20201027_filtered_gaussian_resampled/beta/beta.csv', delimiter = ',')
true_y = np.load('../../../../data/commaai/extracted_coefficients/20201027_filtered_gaussian_resampled/Bzeta/labels_val.npy')

density_path = '../../../../data/commaai/density/gaussian_density_filtered.csv'
density = pd.read_csv(density_path)

no_points = 1000
# grid for variance
grid = np.linspace(0,180,1000)

In [None]:
# get predictive densities
dens = density_predictor(B_zeta, true_y, density, no_points)
result_va_ridge = dens.get_density('va_ridge')
result_hmc_ridge = dens.get_density('hmc_ridge')
result_va_horse = dens.get_density('va_horseshoe')
result_hmc_horse = dens.get_density('hmc_horseshoe')

In [None]:
# save densities for later use
df = pd.DataFrame({'va_ridge' : result_va_ridge['variance prediction'],
                 'hmc_ridge' : result_hmc_ridge['variance prediction'],
                 'va_horse' : result_va_horse['variance prediction'],
                 'hmc_horse' : result_hmc_horse['variance prediction']})
df.to_csv('../../../../data/commaai/predictions/cpl/densities.csv')

In [None]:
# predictions
z_pred = B_zeta.dot(beta) 
y_pred = np.array([density.loc[find_closest_element(norm.cdf(i), density['cdf']), 'axes'] for i in tqdm(z_pred)])

In [None]:
# mse of predictions given grid (= variance) value
er_conf_va_ridge = error_vs_variance(result_va_ridge, grid, true_y)
er_conf_hmc_ridge = error_vs_variance(result_hmc_ridge, grid, true_y)
er_conf_va_horse = error_vs_variance(result_va_horse, grid, true_y)
er_conf_hmc_horse = error_vs_variance(result_hmc_horse, grid, true_y)

In [None]:
plt.plot(grid, er_conf_hmc_horse, label = 'HMC/Horseshoe', color = 'darkred', linestyle = 'dashed')
plt.plot(grid, er_conf_va_ridge, label = 'HMC/VA Ridge', color = 'orange')
#plt.plot(grid, er_conf_hmc_ridge, label = 'HMC/Ridge', color = 'darkorange')
#plt.plot(grid, er_conf_va_horse, label = 'HMC/VA Horseshoe', color = 'red', linestyle = 'dotted')
plt.legend()
plt.xlim(0,70)
plt.gca().invert_xaxis()
plt.legend(framealpha=0.0)
plt.xlabel(r'$\nu$')
plt.ylabel(r'MSE of predictions with  $Var_{pred} \leq \nu$')
plt.savefig('error_vs_confidence_cpl.png')
# good! mse increases with higher variance

In [None]:
plt.scatter(result_hmc_horse['mean prediction'], result_hmc_horse['variance prediction'], alpha = 0.2,color = 'darkred', label = 'HMC/VA Horseshoe')
plt.scatter(result_hmc_ridge['mean prediction'], result_hmc_ridge['variance prediction'], alpha = 0.2, color = 'orange', label = 'HMC/VA Ridge')
plt.legend()
plt.legend(framealpha=0.0)
plt.ylim(-5,200)
plt.xlabel('steering angle in degrees')
plt.ylabel('predicted variance')
plt.savefig('mean_vs_var_cpl.png')