In [1]:
import numpy as np
import pandas as pd
from scipy.stats import norm
import scipy as sp
from tqdm import tqdm
from scipy.integrate import simps, trapz
import matplotlib.pyplot as plt
import imageio
import multiprocessing
from helpers import (Fy, find_closest_element,  compute_coverage, #predict_single_density,
confidence_interval, confidence_interval, generate_fixed_terms, get_ci)
from density_predictor_class import density_predictor

In [2]:
# read in val data
B_zeta = np.load('../../../data/commaai/extracted_coefficients/20201027_filtered_gaussian_resampled/Bzeta/B_zeta_val.npy')
B_zeta = B_zeta.reshape(B_zeta.shape[0], B_zeta.shape[2])
true_y = np.load('../../../data/commaai/extracted_coefficients/20201027_filtered_gaussian_resampled/Bzeta/labels_val.npy')

density_path = '../../../data/commaai/density/gaussian_density_filtered.csv'
density = pd.read_csv(density_path)

no_points = 500

In [3]:
dens = density_predictor(B_zeta, true_y, density, no_points)

In [4]:
result_va_ridge = dens.get_density('va_ridge')
result_hmc_ridge = dens.get_density('hmc_ridge')
result_va_horse = dens.get_density('va_horseshoe')
result_hmc_horse = dens.get_density('hmc_horseshoe')

  0%|          | 29/10472 [00:00<00:36, 288.43it/s]

computing densities for each observation


100%|██████████| 10472/10472 [00:23<00:00, 439.15it/s]
 10%|█         | 1050/10472 [00:00<00:00, 10499.46it/s]

computing mean prediction for each observation


100%|██████████| 10472/10472 [00:01<00:00, 10252.07it/s]
  5%|▍         | 489/10472 [00:00<00:02, 4885.32it/s]

computing variance prediction for each observation


100%|██████████| 10472/10472 [00:01<00:00, 6858.65it/s]
100%|██████████| 10472/10472 [04:12<00:00, 41.51it/s]
100%|██████████| 10472/10472 [00:01<00:00, 9210.17it/s] 
100%|██████████| 10472/10472 [00:01<00:00, 8724.01it/s]


FileNotFoundError: [Errno 2] No such file or directory: '../../../data/commaai/va/filtered_gaussian_resampled/Horseshoe/mu_ts_new_dev.npy'

In [None]:
print('Validation Performance: ' +
      '\nMSE:' +
     '\n CPL VA/Ridge: ' + str(np.mean((true_y - np.array(result_va_ridge['mean predictions']))**2)) +
     '\n CPL HMC/Ridge: ' + str(np.mean((true_y - np.array(result_hmc_ridge['mean prediction']))**2)) +
     '\n CPL VA/Horseshoe: ' + str(np.mean((true_y - np.array(result_va_horse['mean prediction']))**2)) +
     '\n CPL HMC/Horseshoe: ' + str(np.mean((true_y - np.array(result_hmc_horse['mean prediction']))**2)) +
      
    '\nMAE:' +
     '\n CPL VA/Ridge: ' + str(np.mean(np.abs(true_y - np.array(result_va_ridge['mean predictions'])))) +
     '\n CPL HMC/Ridge: ' + str(np.mean(np.abs(true_y - np.array(result_hmc_ridge['mean prediction'])))) +
     '\n CPL VA/Horseshoe: ' + str(np.mean(np.abs(true_y - np.array(result_va_horse['mean prediction'])))) +
     '\n CPL HMC/Horseshoe: ' + str(np.mean(np.abs(true_y - np.array(result_hmc_horse['mean prediction'])))) +
      
      '\nAccuracy:' +
     '\n CPL VA/Ridge: ' + str(np.mean(np.abs(true_y - np.array(result_va_ridge['mean predictions'])) < 6)) +
     '\n CPL HMC/Ridge: ' + str(np.mean(np.abs(true_y - np.array(result_hmc_ridge['mean prediction'])) < 6)) +
     '\n CPL VA/Horseshoe: ' + str(np.mean(np.abs(true_y - np.array(result_va_horse['mean prediction'])) < 6)) +
     '\n CPL HMC/Horseshoe: ' + str(np.mean(np.abs(true_y - np.array(result_hmc_horse['mean prediction'])) < 6))
     )

In [None]:
df = pd.DataFrame({'va_ridge' : result_va_ridge['densities'],
                 'hmc_ridge' : result_hmc_ridge['densities'],
                 'va_horse' : result_va_horse['densities'],
                 'hmc_horse' : result_hmc_horse['densities']})

In [None]:
df.to_csv('../../../data/commaai/predictions/cpl/densities.csv')

## 1. VA + RIDGE + PRECISE

In [None]:
va_ridge_dir = '../../../data/commaai/va/filtered_gaussian_resampled/Ridge/'
mu_t_va = np.genfromtxt(va_ridge_dir + 'mu_t_va.csv', delimiter = ',')
iterations = mu_t_va.shape[0]
beta = np.mean(mu_t_va[int(0.9*iterations):iterations,0:10], axis = 0)
beta_sd = np.std(mu_t_va[int(0.9*iterations):iterations,0:10], axis = 0)
tau_sq = np.mean(np.exp(mu_t_va[int(0.9*iterations):iterations,10]), axis = 0)
tau_sq_sd = np.std(np.exp(mu_t_va[int(0.9*iterations):iterations,10]), axis = 0)
p = len(beta)
z_pred = B_zeta.reshape(B_zeta.shape[0], p).dot(beta)
pred_y = [density.loc[find_closest_element(norm.cdf(z), density['cdf']), 'axes'] for z in z_pred]

In [None]:
densities_va = []
for i in tqdm(range(0, B_zeta.shape[0])):
    dens = predict_single_density(B_zeta[i].reshape(p,), grid, p_y_y0, part_1, phi_1_z, beta, tau_sq, None, 'va_ridge')
    densities_va.append(dens)

In [None]:
# mean prediction
pred_y_va_ridge = []
densities_va
for i in tqdm(range(0, B_zeta.shape[0])):
    y_i = sp.integrate.trapz(densities_va[i]*grid, grid)
    pred_y_va_ridge.append(y_i)
pred_y_va_ridge = np.array(pred_y_va_ridge)

In [None]:
# variance prediction
pred_y_va_ridge_var = []
densities_va
for i in tqdm(range(0, B_zeta.shape[0])):
    y_i = sp.integrate.trapz(densities_va[i]*((grid - pred_y_va_ridge[i])**2), grid)
    pred_y_va_ridge_var.append(y_i)

In [None]:
df_var = pd.DataFrame({'va_ridge': pred_y_va_ridge_var})
df = pd.DataFrame({'va_ridge': pred_y_va_ridge})

In [None]:
np.mean((true_y - np.array(pred_y_va_ridge))**2)

In [None]:
np.mean(np.abs(true_y - np.array(pred_y_va_ridge)) < 6)

In [None]:
plt.scatter(true_y, np.array(pred_y_va_ridge), alpha = 0.1)

## Alternative like in Klein ??

In [None]:
tau_samples = np.random.normal(tau_sq, tau_sq_sd, 100)
beta_samples = np.random.multivariate_normal(beta, np.diag(beta_sd**2), 100)
W = np.array([B_zeta[i,:].dot(B_zeta[i,:]) for i in range(0, n)])

In [None]:
norm_cdf_z = np.array([density.loc[find_closest_element(norm.cdf(z), density['cdf']), 'axes'] for z in z_grid])

In [None]:
import scipy as sp
x = sp.stats.norm.pdf(z_grid.reshape(-1, 1), loc=mu, scale=S)

In [None]:
plt.plot(z_grid, norm_cdf_z)

In [None]:
plt.plot(pred_y_va_ridge, pred)

In [None]:
samples_e = np.zeros((10000, n))
samples_var = np.zeros((10000, n))
for j in tqdm(range(0, tau_samples.shape[0])):
    tau_sq_j = tau_samples[j]
    beta_sample = beta_samples[j]
    S = np.sqrt(1/(1 + W*tau_sq_j))
    for i in range(0,n):
        f1 = norm_cdf_z
        f12 = (norm_cdf_z**2)
        mu = S*B_zeta[i,:].dot(beta_sample)
        f2 = sp.stats.norm.pdf(z_grid.reshape(-1, 1), loc=mu[i], scale=S[i]).reshape(1000,)
        f3 = f1*f2
        f4 = f12*f2
        samples = simps(f3, z_grid, axis=0)
        samples2 = simps(f4, z_grid, axis=0)
        samples_e[j, i] = samples
        samples_var[j, i] = samples2

In [None]:
x = sp.stats.norm.pdf(z_grid.reshape(-1, 1), loc=mu[0], scale=S[0]).reshape(1000,)
f1 = norm_cdf_z

In [None]:
#samples_e

In [None]:
pred = np.mean(samples_e, axis = 0)

In [None]:
plt.scatter(true_y, pred_y, alpha = 0.2)
plt.ylim(-8,8)
plt.xlim(-8,8)
plt.plot([-8,8], [-8,8], color = 'red')

In [None]:
pred2 = np.mean(samples_e[:100,:], axis = 0)

In [None]:
plt.scatter(true_y, pred2, alpha = 0.2)
plt.ylim(-8,8)
plt.xlim(-8,8)
plt.plot([-8,8], [-8,8], color = 'red')

In [None]:
np.mean(np.abs(true_y - pred))

In [None]:
#func_values = np.array([func(z_grid[i], mu, sigma) for i in range(0,z_grid.shape[0])])

In [None]:
n = B_zeta.shape[0]
W = np.array([B_zeta[i,:].dot(B_zeta[i,:]) for i in range(0, n)])
S_list = []
mu_list = []
sigma_list = []
for samp in tqdm(tau_samples):
    S = np.sqrt(1/(1 + W*tau_sq))
    sigma = S
    mu = S*B_zeta.dot(beta)
    mu_list.append(mu)
    sigma_list.append(sigma)
    S_list.append(S)
    

In [None]:
z_grid = np.linspace(min(density['axes']), max(density['axes']), 1000)

In [None]:
E_y_thetas = []
for j in tqdm(range(0, len(mu_list))):
    func_values = np.array([func(z_grid[i], mu_list[0][j], sigma_list[0][j]) for i in range(0,z_grid.shape[0])])
    E_y_theta = sp.integrate.trapz(func_values, z_grid)
    E_y_thetas.append(E_y_theta)

In [None]:
#func_values = [func(z_grid[i], mu[j], sigma[j]) for i in range(0,z_grid.shape[0]) for j in tqdm(range(0, mu.shape[0]))]

In [None]:
np.array(E_y_thetas).shape

In [None]:
np.mean(np.array(E_y_thetas).dot(tau_samples)

In [None]:
func_values = np.array(func_values)
func_values.shape

In [None]:
E_y_theta = sp.integrate.trapz(func_values, z_grid)

In [None]:
E_y_theta

In [None]:
value = [func(density['axes'][i], mu[0], sigma[0]) for i in range(0, density.shape[0])]

In [None]:
values = []
for j in tqdm(range(0, mu.shape[0])):
    value = np.array([func(density['axes'][i], mu[j], sigma[j]) for i in range(0, density.shape[0])])
    values.append(value)

In [None]:
## 2. VA + Horseshoe + PRECISE

In [None]:
va_horse_dir = '../../../data/commaai/va/filtered_gaussian_resampled/Horseshoe/'
mu_t_va = np.load(va_horse_dir + 'mu_ts.npy').reshape(29001, 21)
beta = np.mean(mu_t_va[20000:,0:10], axis = 0)
Lambda = np.mean(np.exp(0.5*mu_t_va[20000:,10:20]), axis = 0)
tau_sq = np.exp(np.mean(mu_t_va[20000:,20], axis = 0))
z_pred = B_zeta.reshape(B_zeta.shape[0], p).dot(beta)
pred_y = [density.loc[find_closest_element(norm.cdf(z), density['cdf']), 'axes'] for z in z_pred]

In [None]:
densities_va = []
for i in tqdm(range(0, B_zeta.shape[0])):
    dens = predict_single_density(B_zeta[i].reshape(p,), grid, p_y_y0, part_1, phi_1_z, beta, tau_sq, Lambda, 'va_horseshoe')
    densities_va.append(dens)

In [None]:
pred_y_va_horse = []
densities_va
for i in tqdm(range(0, B_zeta.shape[0])):
    y_i = sp.integrate.trapz(densities_va[i]*grid, grid)
    pred_y_va_horse.append(y_i)

In [None]:
# variance prediction
pred_y_va_horse_var = []
densities_va
for i in tqdm(range(0, B_zeta.shape[0])):
    y_i = sp.integrate.trapz(densities_va[i]*((grid - pred_y_va_horse[i])**2), grid)
    pred_y_va_horse_var.append(y_i)

In [None]:
df_var['va_horse'] = pred_y_va_horse_var
df['va_horse'] = pred_y_va_horse

In [None]:
np.mean((true_y - np.array(pred_y_va_horse))**2)

In [None]:
np.mean(np.abs((true_y - np.array(pred_y_va_horse))))

In [None]:
np.mean(np.abs(true_y - np.array(pred_y_va_horse)) < 6)

# HMC + Ridge + Precise

In [None]:
hmc_ridge_dir = '../../../data/commaai/mcmc/filtered_gaussian_resampled/Ridge/'
mu_t_hmc = np.load(hmc_ridge_dir + 'all_thetas.npy')[20000:,:]
beta = np.mean(mu_t_hmc[:,0:10], axis = 0)
tau_sq = np.exp(mu_t_hmc[:,10])
z_pred = B_zeta.reshape(B_zeta.shape[0], p).dot(beta)
pred_y = [density.loc[find_closest_element(norm.cdf(z), density['cdf']), 'axes'] for z in z_pred]

In [None]:
densities_va = []
for i in tqdm(range(0, B_zeta.shape[0])):
    dens = predict_single_density(B_zeta[i].reshape(p,), grid, p_y_y0, part_1, phi_1_z, beta, tau_sq, None, 'hmc_ridge')
    densities_va.append(dens)
    
pred_y_hmc_ridge = []
densities_va
for i in tqdm(range(0, B_zeta.shape[0])):
    y_i = sp.integrate.trapz(densities_va[i]*grid, grid)
    pred_y_hmc_ridge.append(y_i)
    
# variance prediction
pred_y_hmc_ridge_var = []
densities_va
for i in tqdm(range(0, B_zeta.shape[0])):
    y_i = sp.integrate.trapz(densities_va[i]*((grid - pred_y_hmc_ridge[i])**2), grid)
    pred_y_hmc_ridge_var.append(y_i)

In [None]:
pred_y_hmc_ridge = []
densities_va
for i in tqdm(range(0, B_zeta.shape[0])):
    y_i = sp.integrate.trapz(densities_va[i]*grid, grid)
    pred_y_hmc_ridge.append(y_i)

In [None]:
# variance prediction
pred_y_hmc_ridge_var = []
densities_va
for i in tqdm(range(0, B_zeta.shape[0])):
    y_i = sp.integrate.trapz(densities_va[i]*((grid - pred_y_hmc_ridge[i])**2), grid)
    pred_y_hmc_ridge_var.append(y_i)

In [None]:
df_var['hmc_ridge'] = pred_y_hmc_ridge_var
df['hmc_ridge'] = pred_y_hmc_ridge_var

In [None]:
np.mean(np.array(pred_y_va_ridge_var) - np.array(pred_y_hmc_ridge_var))

In [None]:
df_var.to_csv('../../../data/commaai/predictions/cpl/densities_variance.csv', index = False)
df.to_csv('../../../data/commaai/predictions/cpl/densities_variance_mean_pred.csv', index = False)

In [None]:
# mse
np.mean((true_y - np.array(pred_y_hmc_horse))**2)

In [None]:
# mae
np.mean(np.abs((true_y - np.array(pred_y_hmc_horse))))

In [None]:
# accuracy
np.mean(np.abs(true_y - np.array(pred_y_hmc_horse)) < 6)