In [14]:
import torch
import random
import numpy as np
np.warnings.filterwarnings('ignore')

from Data_toCompare import CQR_datareader
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

seed = 1

random_state_train_test = seed
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed)
    
# desired miscoverage error
alpha = 0.1

# desired quanitile levels
quantiles = [0.05, 0.95]

# used to determine the size of test set
test_ratio = 0.2

# name of dataset
dataset_base_path = "./Data_toCompare/"
dataset_name = "community"

# load the dataset
X, y = CQR_datareader.GetDataset(dataset_name, dataset_base_path)

# divide the dataset into test and train based on the test_ratio parameter
x_train, x_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=test_ratio,
                                                    random_state=random_state_train_test)

# reshape the data
x_train = np.asarray(x_train)
y_train = np.asarray(y_train)
x_test = np.asarray(x_test)
y_test = np.asarray(y_test)

# compute input dimensions
n_train = x_train.shape[0]
in_shape = x_train.shape[1]

# display basic information
print("Dataset: %s" % (dataset_name))
print("Dimensions: train set (n=%d, p=%d) ; test set (n=%d, p=%d)" % 
      (x_train.shape[0], x_train.shape[1], x_test.shape[0], x_test.shape[1]))

Dataset: community
Dimensions: train set (n=1595, p=100) ; test set (n=399, p=100)


In [15]:
# divide the data into proper training set and calibration set
idx = np.random.permutation(n_train)
n_half = int(np.floor(n_train/2))
idx_train, idx_cal = idx[:n_half], idx[n_half:2*n_half]

# zero mean and unit variance scaling 
scalerX = StandardScaler()
scalerX = scalerX.fit(x_train[idx_train])

# scale
x_train = scalerX.transform(x_train)
x_test = scalerX.transform(x_test)

# scale the labels by dividing each by the mean absolute response
mean_y_train = np.mean(np.abs(y_train[idx_train]))
y_train = np.squeeze(y_train)/mean_y_train
y_test = np.squeeze(y_test)/mean_y_train

In [16]:
from Experiments.EXP1.trainer import model_callByName, loss_callByName

#####################################################
# Neural network parameters
# (See AllQNet_RegressorAdapter class in helper.py)
#####################################################

# pytorch's optimizer object
nn_learn_func = torch.optim.Adam

# number of epochs
epochs = 1000

# learning rate
lr = 0.0005

# mini-batch size
batch_size = 64

# hidden dimension of the network
hidden_size = 64

# dropout regularization rate
dropout = 0.1

# weight decay regularization
wd = 1e-6

N=10

# Ask for a reduced coverage when tuning the network parameters by 
# cross-validataion to avoid too concervative initial estimation of the 
# prediction interval. This estimation will be conformalized by CQR.
quantiles_all = np.linspace(0.01,0.99,N)

y_diffQuants = torch.zeros(N, len(x_test))

for i in range(int(N/2)):
    
    quantiles_net = [quantiles_all[i], quantiles_all[N-1-i]]
    
    alpha = 2 * quantiles_all[i]
    
    

    # define quantile neural network model
    quantile_estimator = helper.AllQNet_RegressorAdapter(model=None,
                                                         fit_params=None,
                                                         in_shape=in_shape,
                                                         hidden_size=hidden_size,
                                                         quantiles=quantiles_net,
                                                         learn_func=nn_learn_func,
                                                         epochs=epochs,
                                                         batch_size=batch_size,
                                                         dropout=dropout,
                                                         lr=lr,
                                                         wd=wd,
                                                         test_ratio=cv_test_ratio,
                                                         random_state=cv_random_state,
                                                         use_rearrangement=False)

    # define a CQR object, computes the absolute residual error of points 
    # located outside the estimated quantile neural network band 
    nc = RegressorNc(quantile_estimator, QuantileRegErrFunc())

    # run CQR procedure
    y_lower, y_upper = helper.run_icp(nc, x_train, y_train, x_test, idx_train, idx_cal, alpha)
    
    y_diffQuants[i] = torch.Tensor(y_lower)
    y_diffQuants[N-1-i] = torch.Tensor(y_upper)

    # compute and print average coverage and average length
    coverage_cp_qnet, length_cp_qnet = helper.compute_coverage(y_test,
                                                               y_lower,
                                                               y_upper,
                                                               alpha,
                                                               "CQR Neural Net")

test_Y = torch.Tensor(y_test)
ret = {}
val_criterias = [
        "MACE_Loss", "AGCE_Loss", "CheckScore"
    ]  

for key in val_criterias:

    real_loss = loss_callByName[key]

    real_err = real_loss(y_diffQuants, test_Y, q_list = np.linspace(0.01,0.99,N)).item()

    if isinstance(real_err, torch.Tensor):

        real_err = real_err.item()

    ret[key] = real_err


print(ret)
    

CQR Neural Net: Percentage in the range (expecting 98.00): 96.992481
CQR Neural Net: Average length: 2.730825
CQR Neural Net: Percentage in the range (expecting 76.22): 76.441103
CQR Neural Net: Average length: 1.179764
CQR Neural Net: Percentage in the range (expecting 54.44): 60.150376
CQR Neural Net: Average length: 0.746732
CQR Neural Net: Percentage in the range (expecting 32.67): 35.839599
CQR Neural Net: Average length: 0.375221
CQR Neural Net: Percentage in the range (expecting 10.89): 15.789474
CQR Neural Net: Average length: 0.105969
{'MACE_Loss': 0.017736021429300308, 'AGCE_Loss': 0.08750000596046448, 'CheckScore': 0.13565975427627563}
