In [1]:
import numpy as np
import pandas as pd
from scipy.stats import norm
from tqdm import tqdm
from scipy.integrate import simps
import matplotlib.pyplot as plt
import imageio
from helpers import Fy, find_closest_element, predict_single_density, compute_coverage, confidence_interval

In [2]:
# read in val data
B_zeta = np.load('../../../data/commaai/extracted_coefficients/20201027_filtered_gaussian_resampled/Bzeta/B_zeta_val.npy')
true_y = np.load('../../../data/commaai/extracted_coefficients/20201027_filtered_gaussian_resampled/Bzeta/labels_val.npy')

density_path = '../../../data/commaai/density/gaussian_density_filtered.csv'
density = pd.read_csv(density_path)

In [3]:
no_points = 65536
grid = np.linspace(min(density['axes']), max(density['axes']), no_points)
density_y = density['axes']
density_pdf = density['pdf']
# compute these beforehand to save computation time
p_y_y0 = [density_pdf[find_closest_element(y_i,density_y)] for y_i in grid]
part_1 = np.array([norm.ppf(Fy(y_i, density)) for y_i in grid])
phi_1_z = np.array([norm(0, 1).pdf(y_i) for y_i in part_1 ])

1. VA + RIDGE

In [4]:
va_ridge_dir = '../../../data/commaai/va/filtered_gaussian_resampled/Ridge/'
mu_t_va = np.genfromtxt(va_ridge_dir + 'mu_t_va.csv', delimiter = ',')
beta = np.mean(mu_t_va[int(0.9*50000):50000,0:10], axis = 0)
tau_sq = np.exp(np.mean(mu_t_va[int(0.9*50000):50000,10], axis = 0))
p = len(beta)

In [5]:
densities_va = []
for i in tqdm(range(0, B_zeta.shape[0])):
    dens = predict_single_density(B_zeta[i].reshape(p,), grid, p_y_y0, part_1, phi_1_z, beta, tau_sq, None, 'va_ridge')
    densities_va.append(dens)

100%|██████████| 10472/10472 [06:37<00:00, 26.32it/s]


In [None]:
def f_integral(i):
    return(integrate.trapz(density[i:(i+2)], support[i:(i+2)]))

In [7]:
z_pred = B_zeta.reshape(B_zeta.shape[0], p).dot(beta)
pred_y = [density.loc[find_closest_element(norm.cdf(z), density['cdf']), 'axes'] for z in z_pred]

In [None]:
confidence_intervals = []
for i in tqdm(range(0, len(z_pred))):
    confidence_intervals.append(confidence_interval(densities_va[i], z_pred[i], density, 0.95, grid))

  6%|▌         | 628/10472 [3:38:39<59:48:59, 21.88s/it]

In [None]:
conf_width = [element[1] - element[0] for element in confidence_intervals]

In [None]:
print('Mean 95% confidence interval width ' + str(np.mean(conf_width)) + ' degree.')

In [None]:
plt.scatter(np.abs(pred_y[:len(conf_width)] - true_y[:len(conf_width)])**2 , conf_width, alpha = 0.5)
plt.xlabel('squared error')
plt.ylabel('95% confidence interval width')

In [None]:
def compute_coverage(confidence_intervals):
    coverage = []
    for i in range(0, len(confidence_intervals)):
        int_i = confidence_intervals[i]
        if  (true_y[i] >= int_i[0])  & (true_y[i] <= int_i[1]):
            covered = 1
        else:
            covered = 0
        coverage.append(covered)   
    return(np.mean(coverage))

In [None]:
coverage = compute_coverage(confidence_intervals)
print('Mean coverage rate of 95% CI: ' + str(coverage))

2. VA + Horseshoe

In [None]:
val_files = pd.read_csv('../../../data/commaai/training_files_filtered/indices/val_indices.csv')

In [None]:
val_files

In [None]:
indices = [idx for idx in range(len(conf_width)) if conf_width[idx] > 20]

In [None]:
val_files.loc[indices,:]

In [None]:
img = imageio.imread('../../../data/commaai/train_bags_2/0/60_785run1.png')

In [None]:
plt.imshow(img)