In [1]:
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import pandas as pd
from scipy.stats import norm
from helpers import (Fy, find_closest_element,  compute_coverage, #predict_single_density,
confidence_interval, confidence_interval, generate_fixed_terms, get_ci)

In [2]:
va_horse = np.load('../../../../data/commaai/calibration/cil_dens/va_horse_cdf_is.npy')
hmc_horse = np.load('../../../../data/commaai/calibration/cil_dens/hmc_horse_cdf_is.npy')
va_ridge = np.load('../../../../data/commaai/calibration/cil_dens/va_ridge_cdf_is.npy')
hmc_ridge = np.load('../../../../data/commaai/calibration/cil_dens/hmc_ridge_cdf_is.npy')

In [3]:
B_zeta = np.load('../../../../data/commaai/extracted_coefficients/20201021_unrestr_gaussian_resampled/Bzeta/B_zeta_val.npy')
beta = np.genfromtxt('../../../../data/commaai/extracted_coefficients/20201021_unrestr_gaussian_resampled/beta/beta.csv', delimiter = ',')
true_y = np.load('../../../../data/commaai/extracted_coefficients/20201021_unrestr_gaussian_resampled/Bzeta/labels_val.npy')
true_y = true_y[np.abs(true_y) < 50]
grid = np.load('../../../../data/commaai/calibration/cil_dens/grid.npy')
density_path = '../../../../data/commaai/density/gaussian_density.csv'
density = pd.read_csv(density_path)

In [4]:
alphas = np.append(np.linspace(0.05, 0.95, 10), float(0.99))

In [5]:
def confidence_intervals(alphas, cdf_is, true_y):
    all_conf_int = []
    for alpha2 in tqdm(alphas):
        confidence_intervals = []
        alpha = 1- alpha2
        i = 0
        for cdf in cdf_is:
            lb = max(grid[cdf <= alpha/2])
            try:
                ub = min(grid[cdf >= 1-alpha/2])
            except: 
                print('error at index:' + str(i))
                ub = max(support)
            i += 1
            confidence_intervals.append([lb, ub])
        all_conf_int.append(confidence_intervals)
    
    # prediction interval width
    conf_widths = []
    for level in range(0, np.append(np.linspace(0.05, 0.95, 10), float(0.99)).shape[0]):  
        conf_width = np.array([all_conf_int[level][i][1] - all_conf_int[level][i][0] for i in range(0, len(cdf_is))])
        conf_widths.append(conf_width)
    
    coverage_rate = []
    # prediction interval coverage rate
    for i in range(0, np.append(np.linspace(0.05, 0.95, 10), float(0.99)).shape[0]):
        # loop over confidence intervals for one PI level
        confidence_intervals = all_conf_int[i]
        in_interval = []
        # loop over single PI 
        for i in range(0, len(true_y)):
            conf_int = confidence_intervals[i]
            if conf_int[0] <= true_y[i] <= conf_int[1]:
                in_interval.append(1)
            else:
                in_interval.append(0)
        mean_int = np.mean(in_interval)
        coverage_rate.append(mean_int)
    
    return({'prediction_intervals': all_conf_int,
            'pred_int_width': conf_widths, 
            'coverage_rate': coverage_rate})

In [6]:
va_horse_ci = confidence_intervals(alphas, va_horse, true_y)
hmc_horse_ci = confidence_intervals(alphas, hmc_horse, true_y)
va_ridge_ci = confidence_intervals(alphas, va_ridge, true_y)
hmc_ridge_ci = confidence_intervals(alphas, hmc_ridge, true_y)

100%|██████████| 11/11 [02:29<00:00, 13.55s/it]
100%|██████████| 11/11 [02:31<00:00, 13.81s/it]
100%|██████████| 11/11 [02:30<00:00, 13.72s/it]
100%|██████████| 11/11 [02:30<00:00, 13.66s/it]


## Prediction Interval Coverage Rates

In [None]:
levels = np.append(np.linspace(0.05, 0.95, 10), float(0.99))
plt.plot(levels, levels - np.array(va_horse_ci['coverage_rate']), linestyle = '-.', label = 'HMC/VA Horseshoe', color = 'red')
#plt.plot(levels, levels - np.array(hmc_horse_ci['coverage_rate']), linestyle = 'dotted', label = 'HMC/Horseshoe', color = 'darkred')
#plt.plot(levels, levels - np.array(va_ridge_ci['coverage_rate']), label = 'VA/Ridge', linestyle = 'dashed', color = 'orange')
plt.plot(levels, levels - np.array(hmc_ridge_ci['coverage_rate']), label = 'HMC/VA Ridge', color = 'darkorange')
plt.xlabel((r'$1- \alpha$'))
plt.ylabel(r'$1- \alpha$ - coverage rate')
plt.legend()
plt.legend(framealpha=0.0)
plt.tight_layout()
plt.savefig('coverage_rate_cil.png', dpi = 300)
# coverage rate is actually higher than predicted coverage rate

## PI width

In [None]:
plt.plot(np.append(np.linspace(0.05, 0.95, 10), float(0.99)), np.mean(np.abs(np.array(va_horse_ci['pred_int_width'])), axis = 1), label = 'VA/HMC Horseshoe', color = 'red', linestyle = '-.')
#plt.plot(np.append(np.linspace(0.05, 0.95, 10), float(0.99)), np.mean(np.abs(np.array(hmc_horse_ci['pred_int_width'])), axis = 1), label = 'HMC/Horseshoe', linestyle = 'dotted',color = 'darkred')
plt.plot(np.append(np.linspace(0.05, 0.95, 10), float(0.99)), np.mean(np.abs(np.array(va_ridge_ci['pred_int_width'])), axis = 1), label = 'VA/HMC Ridge', color = 'orange')
#plt.plot(np.append(np.linspace(0.05, 0.95, 10), float(0.99)), np.mean(np.abs(np.array(hmc_ridge_ci['pred_int_width'])), axis = 1), label = 'HMC/Ridge', color = 'darkorange')
plt.xlabel((r'$1- \alpha$'))
plt.ylabel(r'mean $1 - \alpha$-PI width')
plt.legend()
plt.legend(framealpha=0.0)
plt.tight_layout()
plt.savefig('mean_pi_width_cil.png', dpi = 300)

In [None]:
z_pred = B_zeta.dot(beta) 
y_pred = np.array([density.loc[find_closest_element(norm.cdf(i), density['cdf']), 'axes'] for i in z_pred])

In [None]:
mse_list = []
for i in np.linspace(0,120,100):
    index = np.array(va_horse_ci['pred_int_width'][:][-1] < i)
    if np.sum(index) > 0: 
        mse_list.append(np.mean(np.abs(true_y[index] - y_pred[index])))
    else:
        mse_list.append(float('NaN'))

In [None]:
ninetynine = [va_horse_ci['pred_int_width'][i][-1] for i in range(0, len(va_horse_ci))]

In [None]:
len(va_horse_ci)

In [None]:
len(hmc_horse_ci)