In [49]:
import numpy as np
import pandas as pd
from scipy.stats import norm
import scipy as sp
from tqdm import tqdm
from scipy.integrate import simps, trapz
import matplotlib.pyplot as plt
import imageio
import multiprocessing
from helpers import (Fy, find_closest_element, predict_single_density, compute_coverage, 
confidence_interval, confidence_interval, generate_fixed_terms, get_ci)

In [4]:
# read in val data
B_zeta = np.load('../../../data/commaai/extracted_coefficients/20201027_filtered_gaussian_resampled/Bzeta/B_zeta_val.npy')
B_zeta = B_zeta.reshape(B_zeta.shape[0], B_zeta.shape[2])
true_y = np.load('../../../data/commaai/extracted_coefficients/20201027_filtered_gaussian_resampled/Bzeta/labels_val.npy')

density_path = '../../../data/commaai/density/gaussian_density_filtered.csv'
density = pd.read_csv(density_path)

In [5]:
no_points = 500
p_y_y0, part_1, phi_1_z, grid = generate_fixed_terms(no_points, density)

## 1. VA + RIDGE + PRECISE

In [55]:
va_ridge_dir = '../../../data/commaai/va/filtered_gaussian_resampled/Ridge/'
mu_t_va = np.genfromtxt(va_ridge_dir + 'mu_t_va.csv', delimiter = ',')
iterations = mu_t_va.shape[0]
beta = np.mean(mu_t_va[int(0.9*iterations):iterations,0:10], axis = 0)
tau_sq = np.mean(np.exp(mu_t_va[int(0.9*iterations):iterations,10]), axis = 0)
tau_sq_sd = np.std(np.exp(mu_t_va[int(0.9*iterations):iterations,10]), axis = 0)
p = len(beta)
z_pred = B_zeta.reshape(B_zeta.shape[0], p).dot(beta)
pred_y = [density.loc[find_closest_element(norm.cdf(z), density['cdf']), 'axes'] for z in z_pred]

In [None]:
densities_va = []
for i in tqdm(range(0, B_zeta.shape[0])):
    dens = predict_single_density(B_zeta[i].reshape(p,), grid, p_y_y0, part_1, phi_1_z, beta, tau_sq, None, 'va_ridge')
    densities_va.append(dens)

In [None]:
# mean prediction
pred_y_va_ridge = []
densities_va
for i in tqdm(range(0, B_zeta.shape[0])):
    y_i = sp.integrate.trapz(densities_va[i]*grid, grid)
    pred_y_va_ridge.append(y_i)
pred_y_va_ridge = np.array(pred_y_va_ridge)

In [None]:
# variance prediction
pred_y_va_ridge_var = []
densities_va
for i in tqdm(range(0, B_zeta.shape[0])):
    y_i = sp.integrate.trapz(densities_va[i]*((grid - pred_y_va_ridge[i])**2), grid)
    pred_y_va_ridge_var.append(y_i)

In [None]:
df_var = pd.DataFrame({'va_ridge': pred_y_va_ridge_var})
df = pd.DataFrame({'va_ridge': pred_y_va_ridge})

In [None]:
np.mean((true_y - np.array(pred_y_va_ridge))**2)

In [None]:
np.mean(np.abs(true_y - np.array(pred_y_va_ridge)) < 6)

In [None]:
plt.scatter(true_y, np.array(pred_y_va_ridge), alpha = 0.1)

## Alternative like in Klein ??

In [16]:
def func(z, mu, sigma):
    return(density.loc[find_closest_element(norm.cdf(z), density['cdf']), 'axes']*norm.pdf(z, mu, sigma))
def func2(z, mu, sigma):
    return((density.loc[find_closest_element(norm.cdf(z), density['cdf']), 'axes']**2) *norm.pdf(z, mu, sigma))

In [None]:
tau_samples = np.random.normal(tau_sq, tau_sq_sd)

In [10]:
n = B_zeta.shape[0]
W = np.array([B_zeta[i,:].dot(B_zeta[i,:]) for i in range(0, n)])
S = np.sqrt(1/(1 + W*tau_sq))
sigma = S
mu = S*B_zeta.dot(beta)

In [32]:
z_grid = np.linspace(min(density['axes']), max(density['axes']), 1000)

In [47]:
func_values = [func(z_grid[i], mu[0], sigma[0]) for i in range(0,z_grid.shape[0])]

In [52]:
E_y_theta = sp.integrate.trapz(func_values, z_grid)

In [53]:
E_y_theta

-0.18966822650374993

In [27]:
value = [func(density['axes'][i], mu[0], sigma[0]) for i in range(0, density.shape[0])]

KeyboardInterrupt: 

In [22]:
values = []
for j in tqdm(range(0, mu.shape[0])):
    value = np.array([func(density['axes'][i], mu[j], sigma[j]) for i in range(0, density.shape[0])])
    values.append(value)

  0%|          | 2/10472 [03:16<285:22:06, 98.12s/it]


KeyboardInterrupt: 

In [None]:
## 2. VA + Horseshoe + PRECISE

In [None]:
va_horse_dir = '../../../data/commaai/va/filtered_gaussian_resampled/Horseshoe/'
mu_t_va = np.load(va_horse_dir + 'mu_ts.npy').reshape(29001, 21)
beta = np.mean(mu_t_va[20000:,0:10], axis = 0)
Lambda = np.mean(np.exp(0.5*mu_t_va[20000:,10:20]), axis = 0)
tau_sq = np.exp(np.mean(mu_t_va[20000:,20], axis = 0))
z_pred = B_zeta.reshape(B_zeta.shape[0], p).dot(beta)
pred_y = [density.loc[find_closest_element(norm.cdf(z), density['cdf']), 'axes'] for z in z_pred]

In [None]:
densities_va = []
for i in tqdm(range(0, B_zeta.shape[0])):
    dens = predict_single_density(B_zeta[i].reshape(p,), grid, p_y_y0, part_1, phi_1_z, beta, tau_sq, Lambda, 'va_horseshoe')
    densities_va.append(dens)

In [None]:
pred_y_va_horse = []
densities_va
for i in tqdm(range(0, B_zeta.shape[0])):
    y_i = sp.integrate.trapz(densities_va[i]*grid, grid)
    pred_y_va_horse.append(y_i)

In [None]:
# variance prediction
pred_y_va_horse_var = []
densities_va
for i in tqdm(range(0, B_zeta.shape[0])):
    y_i = sp.integrate.trapz(densities_va[i]*((grid - pred_y_va_horse[i])**2), grid)
    pred_y_va_horse_var.append(y_i)

In [None]:
df_var['va_horse'] = pred_y_va_horse_var
df['va_horse'] = pred_y_va_horse

In [None]:
np.mean((true_y - np.array(pred_y_va_horse))**2)

In [None]:
np.mean(np.abs((true_y - np.array(pred_y_va_horse))))

In [None]:
np.mean(np.abs(true_y - np.array(pred_y_va_horse)) < 6)

# HMC + Ridge + Precise

In [None]:
hmc_ridge_dir = '../../../data/commaai/mcmc/filtered_gaussian_resampled/Ridge/'
mu_t_hmc = np.load(hmc_ridge_dir + 'all_thetas.npy')[20000:,:]
beta = np.mean(mu_t_hmc[:,0:10], axis = 0)
tau_sq = np.exp(mu_t_hmc[:,10])
z_pred = B_zeta.reshape(B_zeta.shape[0], p).dot(beta)
pred_y = [density.loc[find_closest_element(norm.cdf(z), density['cdf']), 'axes'] for z in z_pred]

In [None]:
densities_va = []
for i in tqdm(range(0, B_zeta.shape[0])):
    dens = predict_single_density(B_zeta[i].reshape(p,), grid, p_y_y0, part_1, phi_1_z, beta, tau_sq, None, 'hmc_ridge')
    densities_va.append(dens)

In [None]:
pred_y_hmc_ridge = []
densities_va
for i in tqdm(range(0, B_zeta.shape[0])):
    y_i = sp.integrate.trapz(densities_va[i]*grid, grid)
    pred_y_hmc_ridge.append(y_i)

In [None]:
# variance prediction
pred_y_hmc_ridge_var = []
densities_va
for i in tqdm(range(0, B_zeta.shape[0])):
    y_i = sp.integrate.trapz(densities_va[i]*((grid - pred_y_hmc_ridge[i])**2), grid)
    pred_y_hmc_ridge_var.append(y_i)

In [None]:
df_var['hmc_ridge'] = pred_y_hmc_ridge_var
df['hmc_ridge'] = pred_y_hmc_ridge_var

In [None]:
np.mean(np.array(pred_y_va_ridge_var) - np.array(pred_y_hmc_ridge_var))

In [None]:
df_var.to_csv('../../../data/commaai/predictions/cpl/densities_variance.csv', index = False)
df.to_csv('../../../data/commaai/predictions/cpl/densities_variance_mean_pred.csv', index = False)

In [None]:
# mse
np.mean((true_y - np.array(pred_y_hmc_horse))**2)

In [None]:
# mae
np.mean(np.abs((true_y - np.array(pred_y_hmc_horse))))

In [None]:
# accuracy
np.mean(np.abs(true_y - np.array(pred_y_hmc_horse)) < 6)