In [21]:
import numpy as np
import pandas as pd
from scipy.stats import norm
from tqdm import tqdm
from scipy.integrate import simps
import matplotlib.pyplot as plt
import imageio
import multiprocessing
from scipy import integrate
from helpers import Fy, find_closest_element, get_densities, get_cdf

In [22]:
# read in val data
B_zeta = np.load('../../../data/commaai/extracted_coefficients/20201027_filtered_gaussian_resampled/Bzeta/B_zeta_val.npy')
true_y = np.load('../../../data/commaai/extracted_coefficients/20201027_filtered_gaussian_resampled/Bzeta/labels_val.npy')

density_path = '../../../data/commaai/density/gaussian_density_filtered.csv'
density = pd.read_csv(density_path)

In [23]:
no_points = 10000
grid = np.linspace(min(density['axes']), max(density['axes']), no_points)
density_y = density['axes']
density_pdf = density['pdf']
# compute these beforehand to save computation time
p_y_y0 = [density_pdf[find_closest_element(y_i,density_y)] for y_i in grid]
part_1 = np.array([norm.ppf(Fy(y_i, density)) for y_i in grid])
phi_1_z = np.array([norm(0, 1).pdf(y_i) for y_i in part_1 ])

In [24]:
va_ridge_dir = '../../../data/commaai/va/filtered_gaussian_resampled/Ridge/'
mu_t_va = np.genfromtxt(va_ridge_dir + 'mu_t_va.csv', delimiter = ',')
beta_va = np.mean(mu_t_va[int(0.9*50000):50000,0:10], axis = 0)
tau_sq_va = np.exp(np.mean(mu_t_va[int(0.9*50000):50000,10], axis = 0))
p = len(beta_va)

In [25]:
densities_va = get_densities(B_zeta, grid, p_y_y0, part_1, phi_1_z, beta_va, tau_sq_va, p,  'va_ridge', None)
Fys = get_cdf(true_y, grid, densities_va)
p_js = np.linspace(0,1,1000)
n = Fys.shape[0]
p_tilde_js = []
for p_j in p_js:
    p_tilde_j = np.sum(Fys < p_j)/n
    p_tilde_js.append(p_tilde_j)

100%|██████████| 10472/10472 [01:13<00:00, 142.00it/s]
100%|██████████| 10472/10472 [1:12:00<00:00,  2.42it/s]


In [26]:
df = pd.DataFrame({'p_j': p_js, 'p_tilde_j_va_ridge_precise': p_tilde_js - p_js})
#df.to_csv('p_j_va_precise_ridge.csv')

In [27]:
# VA + Ridge + Imprecise

In [28]:
va_ridge_dir = '../../../data/commaai/va/unfiltered_gaussian_resampled/Ridge/'
mu_t_va_impr = np.genfromtxt(va_ridge_dir + 'mu_t_va.csv', delimiter = ',')
beta_va_impr = mu_t_va_impr[0:10]
tau_sq_va_impr = np.exp(mu_t_va_impr[10])
p = 10

In [None]:
densities_va_impr = get_densities(B_zeta, grid, p_y_y0, part_1, phi_1_z, beta_va_impr, tau_sq_va_impr, p, 'va_ridge', None)
Fys_impr = get_cdf(true_y, grid, densities_va_impr)

100%|██████████| 10472/10472 [01:19<00:00, 131.79it/s]
 30%|███       | 3175/10472 [21:59<49:50,  2.44it/s]  

In [None]:
p_js = np.linspace(0,1,1000)
n = Fys_impr.shape[0]
p_tilde_js = []
for p_j in p_js:
    p_tilde_j = np.sum(Fys_impr < p_j)/n
    p_tilde_js.append(p_tilde_j)

In [None]:
df['p_tilde_j_va_ridge_imprecise'] = p_tilde_js - p_js

# VA + Horseshoe + Precise

In [None]:
va_ridge_dir = '../../../data/commaai/va/filtered_gaussian_resampled/Horseshoe/'
mu_t_va_horse_pr =  np.mean(np.load(va_ridge_dir + 'mu_ts_delete.npy')[20000:,:,:], axis = 0)
beta_va_horse_pr = mu_t_va_horse_pr[0:10]
tau_sq_va_horse_pr = np.exp(mu_t_va_horse_pr[10])
p = 10

In [None]:
densities_va_horse_pr = get_densities(B_zeta, grid, p_y_y0, part_1, phi_1_z, beta_va_horse_pr, tau_sq_va_horse_pr, p, 'va_ridge', None)
Fys_impr = get_cdf(true_y, grid, densities_va_horse_pr)

In [None]:
p_js = np.linspace(0,1,1000)
n = Fys_impr.shape[0]
p_tilde_js = []
for p_j in p_js:
    p_tilde_j = np.sum(Fys_impr < p_j)/n
    p_tilde_js.append(p_tilde_j)

In [None]:
df['p_tilde_j_va_horseshoe_precise'] = p_tilde_js - p_js

In [None]:
plt.plot(p_js, p_tilde_js - p_js )

In [None]:
df.to_csv('p_j_va_precise_ridge_new.csv')