In [1]:
import numpy as np
import statsmodels.api as sm
from tqdm import tqdm
import matplotlib.pyplot as plt
import multiprocessing
from scipy import integrate
import pandas as pd
from helpers import confidence_intervals
import ray
ray.init()

2021-03-20 14:52:08,953	INFO services.py:1090 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


{'node_ip_address': '192.168.140.16',
 'raylet_ip_address': '192.168.140.16',
 'redis_address': '192.168.140.16:6379',
 'object_store_address': '/tmp/ray/session_2021-03-20_14-52-08_354930_1302197/sockets/plasma_store',
 'raylet_socket_name': '/tmp/ray/session_2021-03-20_14-52-08_354930_1302197/sockets/raylet',
 'webui_url': '127.0.0.1:8265',
 'session_dir': '/tmp/ray/session_2021-03-20_14-52-08_354930_1302197',
 'metrics_export_port': 60332,
 'node_id': 'b577b2cf7f2a3b377d65774dbcb6d76ba4cc61b4'}

In [2]:
mc_preds = np.append(np.load('../../../data/commaai/predictions/mc_dropout/mc_preds_1.npy'), 
                     np.load('../../../data/commaai/predictions/mc_dropout/mc_preds_2.npy'), axis = 0)

In [3]:
# read in val data
true_y = np.load('../../../data/commaai/extracted_coefficients/20201027_filtered_gaussian_resampled/Bzeta/labels_val.npy')

density_path = '../../../data/commaai/density/gaussian_density_filtered.csv'
density = pd.read_csv(density_path)

In [None]:
densities = []
supports = []
for i in tqdm(range(0, mc_preds.shape[0])):
    kde = sm.nonparametric.KDEUnivariate(mc_preds[i,:,:])
    kde.fit() # Estimate the densities
    support = kde.support
    endog = kde.endog
    density = kde.density
    supports.append(support)
    densities.append(density) 

In [None]:
densities.shape

In [None]:
j = 0
cdf_mc_dropout = [] 
for supp in tqdm(densities):
    grid = supports[j]
    dens = densities[j]
    def f_integral(i):
        return(integrate.trapz(dens[i:(i+2)], grid[i:(i+2)]))
    with multiprocessing.Pool(20) as proc:
        probs = proc.map(f_integral, np.array([i for i in range(0, grid.shape[0])]))
    cdf_i = np.cumsum(np.array(probs))
    cdf_mc_dropout.append(cdf_i)
    j += 1

In [None]:
cdf_mc_dropout = np.array(cdf_mc_dropout)

In [None]:
np.save('../../../data/commaai/predictions/mc_dropout/cdf_mc_dropout_is.npy', cdf_mc_dropout)

# get extended densities

In [4]:
no_points = 750
grid = np.linspace(min(density['axes']), max(density['axes']), int(no_points))

In [None]:
@ray.remote
def assign_dens(supp, density_ext, grid, dens):
    density_ext_l = density_ext.copy()
    for i in range(0, supp.shape[0]):
        density_ext_l[np.where(np.abs(grid - supp[i]) == min(np.abs(grid - supp[i])))] = dens[i]
    return(density_ext_l)

In [None]:
density_ext_list = []
density_ext = np.array(np.repeat(0, 750), dtype=float)
for j in tqdm(range(0, len(densities))):
    density_ext = np.array(np.repeat(0, 750), dtype=float)
    for i in range(0, supports[j].shape[0]):
        density_ext[np.where(np.abs(grid - supports[j][i]) == min(np.abs(grid - supports[j][i])))] = densities[j][i]
    density_ext_list.append(density_ext)

In [None]:
density_ext_list = np.array(density_ext_list)
np.save('density_dropout.npy', density_ext_list)

In [None]:
grid = support
dens = density_gauss
def f_integral(i):
    return(integrate.trapz(dens[i:(i+2)], grid[i:(i+2)]))
with multiprocessing.Pool(20) as proc:
        probs = proc.map(f_integral, np.array([i for i in range(0, grid.shape[0])]))
cdf_true = np.cumsum(np.array(probs))

In [None]:
def f_integral(i):
        return(integrate.trapz(mc_av_den[i:(i+2)], grid[i:(i+2)]))
with multiprocessing.Pool(20) as proc:
    probs = proc.map(f_integral, np.array([i for i in range(0, grid.shape[0])]))
mc_cdf = np.cumsum(np.array(probs))

In [None]:
Gt = []
for i in tqdm(grid):
    Gt.append(sum(x <= i for x in true_y))
Gt = np.array(Gt)/len(true_y)

In [None]:
fig = plt.figure(figsize=(7,5))
ax = fig.add_subplot(111)
ax.plot(grid, mc_cdf - Gt,  color = 'red', label = 'MC-Dropout', linestyle = '-.', linewidth = 1)
#ax.plot(grid, va_ridge_cdf - Gt, color = 'orange', label = 'HMC/VA Ridge', linewidth = 1)
#ax.plot(grid, hmc_horse_cdf - Gt,  color = 'blue', label = 'VA/Horseshoe', linestyle = '-.', linewidth = 1)
#ax.plot(grid, hmc_ridge_cdf - Gt, color = 'green', linestyle= '--', label = 'HMC Horseshoe')
ax.legend()
plt.legend(framealpha=0.0)
fig.tight_layout(rect=[0.05, 0, 1, 1])
plt.xlabel('angle')
plt.autoscale()
plt.ylabel('average predictive CDF - empirical CDF')

## Prediction Intervals

In [None]:
def confidence_intervals(alphas, cdf_is, true_y, grid):
    all_conf_int = []
    for alpha2 in tqdm(alphas):
        confidence_intervals = []
        alpha = 1- alpha2
        i = 0
        for cdf in cdf_is:
            lb = max(grid[cdf <= alpha/2])
            try:
                ub = min(grid[cdf >= 1-alpha/2])
            except: 
                print('error at index:' + str(i))
                ub = max(support)
            i += 1
            confidence_intervals.append([lb, ub])
        all_conf_int.append(confidence_intervals)
    
    # prediction interval width
    conf_widths = []
    for level in range(0, np.append(np.linspace(0.05, 0.95, 10), float(0.99)).shape[0]):  
        conf_width = np.array([all_conf_int[level][i][1] - all_conf_int[level][i][0] for i in range(0, len(cdf_is))])
        conf_widths.append(conf_width)
    
    coverage_rate = []
    # prediction interval coverage rate
    for i in range(0, np.append(np.linspace(0.05, 0.95, 10), float(0.99)).shape[0]):
        confidence_intervals = all_conf_int[i]
        in_interval = []
        # loop over single PI 
        for i in range(0, len(true_y)):
            conf_int = confidence_intervals[i]
            if conf_int[0] <= true_y[i] <= conf_int[1]:
                in_interval.append(1)
            else:
                in_interval.append(0)
        mean_int = np.mean(in_interval)
        coverage_rate.append(mean_int)
    
    return({'prediction_intervals': all_conf_int,
            'pred_int_width': conf_widths, 
            'coverage_rate': coverage_rate})

In [5]:
density_ext_list = np.load('density_dropout.npy')

In [6]:
j = 0
cdf_mc_dropout = [] 
for supp in tqdm(density_ext_list):
    dens = density_ext_list[j,:]
    def f_integral(i):
        return(integrate.trapz(dens[i:(i+2)], grid[i:(i+2)]))
    with multiprocessing.Pool(20) as proc:
        probs = proc.map(f_integral, np.array([i for i in range(0, grid.shape[0])]))
    cdf_i = np.cumsum(np.array(probs))
    cdf_mc_dropout.append(cdf_i)
    j += 1

100%|██████████| 10472/10472 [1:10:03<00:00,  2.49it/s]


In [7]:
np.save('../../../data/commaai/predictions/mc_dropout/cdf_is_mc_dropout.npy', np.array(cdf_mc_dropout))

In [None]:
cdf_mc_dropout = np.load('cdf_is_mc_dropout.npy')

In [None]:
cdf_mc_dropout = np.array(cdf_mc_dropout)

In [None]:
def confidence_intervals(alphas, cdf_is, true_y, grid):
    all_conf_int = []
    for alpha2 in tqdm(alphas):
        confidence_intervals = []
        alpha = 1- alpha2
        i = 0
        for cdf in cdf_is:
            lb = max(grid[cdf <= alpha/2])
            try:
                ub = min(grid[cdf >= 1-alpha/2])
            except: 
                print('error at index:' + str(i))
                ub = max(grid)
            i += 1
            confidence_intervals.append([lb, ub])
        all_conf_int.append(confidence_intervals)
    
    # prediction interval width
    conf_widths = []
    for level in range(0, np.linspace(0.05, 0.95, 10).shape[0]):  #np.append(, float(0.99)
        conf_width = np.array([all_conf_int[level][i][1] - all_conf_int[level][i][0] for i in range(0, len(cdf_is))])
        conf_widths.append(conf_width)
    
    coverage_rate = []
    # prediction interval coverage rate
    for i in range(0, np.linspace(0.05, 0.95, 10).shape[0]): #np.append(, float(0.99))
        confidence_intervals = all_conf_int[i]
        in_interval = []
        # loop over single PI 
        for j in range(0, true_y.shape[0]):
            conf_int = confidence_intervals[i]
            if conf_int[0] <= true_y[j] <= conf_int[1]:
                in_interval.append(1)
            else:
                in_interval.append(0)
        mean_int = np.mean(in_interval)
        coverage_rate.append(mean_int)
    
    return({'prediction_intervals': all_conf_int,
            'pred_int_width': conf_widths, 
            'coverage_rate': coverage_rate})

In [None]:
alphas = np.linspace(0.05, 0.95, 10) #np.append(, float(0.99))
cis = confidence_intervals(alphas, cdf_mc_dropout, true_y, grid)

In [None]:
levels = np.linspace(0.05, 0.95, 10) # np.append( , float(0.99)
plt.plot(levels, levels - np.array(cis['coverage_rate']), linestyle = '-.', label = 'MC-Dropout', color = 'red')
plt.xlabel((r'$1- \alpha$'))
plt.ylabel(r'$1- \alpha$ - coverage rate')

In [None]:
np.array(cis['prediction_intervals']).shape