In [21]:
import torch
import random
import numpy as np
np.warnings.filterwarnings('ignore')

from Data_toCompare import CQR_datareader
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

seed = 1

random_state_train_test = seed
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed)
    
# desired miscoverage error
alpha = 0.1

# desired quanitile levels
quantiles = [0.05, 0.95]

# used to determine the size of test set
test_ratio = 0.2

# name of dataset
dataset_base_path = "./Data_toCompare/"
dataset_name = "community"

# load the dataset
X, y = CQR_datareader.GetDataset(dataset_name, dataset_base_path)

# divide the dataset into test and train based on the test_ratio parameter
x_train, x_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=test_ratio,
                                                    random_state=random_state_train_test)

# reshape the data
x_train = np.asarray(x_train)
y_train = np.asarray(y_train)
x_test = np.asarray(x_test)
y_test = np.asarray(y_test)

# compute input dimensions
n_train = x_train.shape[0]
in_shape = x_train.shape[1]

# display basic information
print("Dataset: %s" % (dataset_name))
print("Dimensions: train set (n=%d, p=%d) ; test set (n=%d, p=%d)" % 
      (x_train.shape[0], x_train.shape[1], x_test.shape[0], x_test.shape[1]))

Dataset: community
Dimensions: train set (n=1595, p=100) ; test set (n=399, p=100)


# Data Split

In [22]:
# divide the data into proper training set and calibration set
idx = np.random.permutation(n_train)
n_half = int(np.floor(n_train/2))
idx_train, idx_cal = idx[:n_half], idx[n_half:2*n_half]

# zero mean and unit variance scaling 
scalerX = StandardScaler()
scalerX = scalerX.fit(x_train[idx_train])

# scale
x_train = scalerX.transform(x_train)
x_test = scalerX.transform(x_test)

# scale the labels by dividing each by the mean absolute response
mean_y_train = np.mean(np.abs(y_train[idx_train]))
y_train = np.squeeze(y_train)/mean_y_train
y_test = np.squeeze(y_test)/mean_y_train

# OurMethod

In [24]:
from cqr import helper
from Experiments.EXP1.TestPerform import testPerform_projKernel
from sklearn import random_projection
from sklearn.ensemble import RandomForestRegressor
import torch
from src.kernel_methods import kernel_estimator


depth = 10

train_X = x_train[idx_train]
train_Y = y_train[idx_train]
test_X = torch.Tensor(x_test)
test_Y = torch.Tensor(y_test).view(-1).cuda()
recal_X = torch.Tensor(x_train[idx_cal])
recal_Y = torch.Tensor(y_train[idx_cal]).view(-1).cuda()

rf_model = RandomForestRegressor(max_depth=depth, random_state=0)
rf_model.fit(train_X, train_Y)




n_component = 20
transformer = random_projection.GaussianRandomProjection(n_components = n_component)
reformer = lambda x : torch.Tensor(transformer.fit_transform(x.cpu().numpy()))

for width in [5,6,7,8,9,10]:
    print(width)
    
    record = testPerform_projKernel(
        test_X, test_Y, recal_X, recal_Y, 
        model_name = "RFKernel_RandomProj", model= rf_model, reformer= reformer, wid = width) 
    
    print(record)
    
    
    
    recal_mean = torch.Tensor(rf_model.predict(recal_X.cpu().numpy())).cuda()
    test_mean = torch.Tensor(rf_model.predict(test_X.cpu().numpy())).cuda()
    
    
    
    test_Z =  reformer(test_X)

    recal_Z = reformer(recal_X)

    

    
    eps_diffQuants = kernel_estimator(
        test_Z = test_Z.cuda(),
        recal_Z = recal_Z.cuda(),
        recal_epsilon = torch.Tensor(recal_Y - recal_mean).cuda(),
        quants = np.array([0.05, 0.95]),
        wid= width
    )

    y_diffQuants = (eps_diffQuants + test_mean.view(1,-1).repeat(len(eps_diffQuants),1)).cpu().numpy()

    coverage_cp_qforest, length_cp_qforest = helper.compute_coverage(
        test_Y.cpu().numpy(),
        y_diffQuants[0], y_diffQuants[1], alpha, "Our Estimation")







5
{'MACE_Loss': 0.01590702310204506, 'AGCE_Loss': 0.09994242340326309, 'CheckScore': 0.14681142568588257}
Our Estimation: Percentage in the range (expecting 90.00): 88.220551
Our Estimation: Average length: 1.713888
6
{'MACE_Loss': 0.031997524201869965, 'AGCE_Loss': 0.08201514929533005, 'CheckScore': 0.15161262452602386}
Our Estimation: Percentage in the range (expecting 90.00): 87.719298
Our Estimation: Average length: 1.709117
7
{'MACE_Loss': 0.020549040287733078, 'AGCE_Loss': 0.10552021116018295, 'CheckScore': 0.14577320218086243}
Our Estimation: Percentage in the range (expecting 90.00): 86.716792
Our Estimation: Average length: 1.717734
8
{'MACE_Loss': 0.020265376195311546, 'AGCE_Loss': 0.0823792889714241, 'CheckScore': 0.14947080612182617}
Our Estimation: Percentage in the range (expecting 90.00): 85.714286
Our Estimation: Average length: 1.751068
9
{'MACE_Loss': 0.022118112072348595, 'AGCE_Loss': 0.0871318057179451, 'CheckScore': 0.15051314234733582}
Our Estimation: Percentage i