In [47]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm
from tqdm import tqdm
import sys
import matplotlib as mpl

sys.modules.pop('generate_syn_data', None)
from generate_syn_data import *

sys.modules.pop('ARW', None)
from ARW import *

In [39]:
def calculate_coverage(y_hat, qt_khat, mu_t, variance):
    y_upp = y_hat + qt_khat
    y_low = y_hat - qt_khat
    coverage = norm.cdf(y_upp, mu_t, np.sqrt(variance)) - norm.cdf(y_low, mu_t, np.sqrt(variance))
    return coverage

In [49]:
alpha =0.1; delta = 0.1; gamma=1
num_periods = 200

#NOTE: uncomment to change shift patterns
beta_1 = 5*generate_true_means(num_periods-1, 1)
#beta_1 = np.ones(num_periods)
beta_2 = np.ones(num_periods)
beta = np.column_stack((beta_1, beta_2))
variance_y = 0.5; meanX = 1

np.random.seed(6)

#B_arr is for val set
B_arr = np.random.randint(low=1, high=2, size=num_periods)
B_arr_starts = np.cumsum(B_arr) - B_arr
B_arr_ends = np.cumsum(B_arr) - 1

#for training set
B_arr_tr = 10 * B_arr

#NOTE: all the above can be tuned

fixed_windows = [1, 256]
num_trials = 20

cov_dict = {}
cov_dict['ARW'] = {}
for k in fixed_windows:
    cov_dict[f'Val_{k}'] = {}
    for trial in range(num_trials):
        cov_dict[f'Val_{k}'][trial] = []
        cov_dict['ARW'][trial] = []

seeds = np.arange(num_trials) + 2024

for (trial, seed) in tqdm(enumerate(seeds)):
    
    np.random.seed(seed)
    X_tr, y_tr = generate_linreg_data(meanX, B_arr_tr, beta, variance_y)
    X_val, y_val = generate_linreg_data(meanX, B_arr, beta, variance_y)
    reg, S = fit_and_get_scores(X_tr, y_tr, X_val, y_val)

    for t in range(num_periods):
                
        S_t = S[:B_arr_ends[t]+1]
        B_arr_t = B_arr[:t+1]

        khat, qt_khat, qtk_all = ARWQE(S_t, B_arr_t, alpha, delta, gamma)
        beta_t = beta[t]

        #compute coverage
        dimX = X_tr.shape[1]
        X_test = generate_multinomial_X(meanX, dimX, 100)
        mu_test = X_test @ beta[t].T
        y_hat = reg.predict(X_test)
        coverage_ARW = calculate_coverage(y_hat, qt_khat, mu_test, variance_y)
        cov_dict['ARW'][trial].append(coverage_ARW.mean())

        #baseline: take quantile of fixed k
        for ik, k in enumerate(fixed_windows):
            qtk = qtk_all[min(k,t)]
            coverage_k = calculate_coverage(y_hat, qtk, mu_test, variance_y)
            cov_dict[f'Val_{k}'][trial].append(coverage_k.mean())

20it [01:02,  3.13s/it]


In [42]:
#process results in cov_dict
methods = ['ARW'] + [f'Val_{k}' for k in fixed_windows]
print(methods)
num_methods = len(methods)
cov_array = np.zeros((len(methods), num_trials, num_periods))
for (i, key) in enumerate(cov_dict.keys()):
    for (trial, trial_coverage) in cov_dict[key].items():
        cov_array[i, trial, :] = trial_coverage

#take average of coverage over time and trials for each method
mean_coverage = np.mean(np.mean(cov_array, axis=2), axis=1)
std_coverage = np.std(np.mean(cov_array, axis=2), axis=1)
print(mean_coverage)

['ARW', 'Val_1', 'Val_64']
[0.78430963 0.65561305 0.92530462]
