In [7]:
import os, sys
import torch
import numpy as np
import argparse
from argparse import Namespace
import tqdm
import six
from scipy import stats
import pickle
from helper import set_seeds
from torch.utils.data import DataLoader
from datasets.datasets import get_scaled_data, get_synthetic_data
from utils.q_model_ens import QModelEns, MSEModel
from losses import batch_qr_loss, batch_interval_loss
import helper
from helper import SYN_DATA, REAL_DATA

sys.modules['sklearn.externals.six'] = six
np.warnings.filterwarnings('ignore')

os.environ["MKL_CBWR"] = 'AUTO'

results_path = helper.results_path

if torch.cuda.is_available():
    device = "cuda:0"
else:
    device = "cpu"


def get_loss_fn(loss_name):
    if loss_name == 'batch_qr' or loss_name == 'batch_wqr':
        fn = batch_qr_loss
    elif loss_name == 'batch_int':
        fn = batch_interval_loss
    else:
        raise ValueError('loss arg not valid')

    return fn


def parse_args():
    parser = argparse.ArgumentParser()

    parser.add_argument('--seed', type=int, default=None,
                        help='random seed')

    parser.add_argument('--seed_begin', type=int, default=None,
                        help='random seed')
    parser.add_argument('--seed_end', type=int, default=None,
                        help='random seed')

    parser.add_argument('--data', type=str, default='',
                        help='dataset to use')

    parser.add_argument('--num_q', type=int, default=30,
                        help='number of quantiles you want to sample each step')
    parser.add_argument('--gpu', type=int, default=1,
                        help='gpu num to use')

    parser.add_argument('--num_ep', type=int, default=10000,
                        help='number of epochs')
    parser.add_argument('--nl', type=int, default=2,
                        help='number of layers')
    parser.add_argument('--hs', type=int, default=64,
                        help='hidden size')

    parser.add_argument('--dropout', type=float, default=0,
                        help='dropout ratio of the dropout level')
    parser.add_argument('--lr', type=float, default=1e-3,
                        help='learning rate')
    parser.add_argument('--wd', type=float, default=0.0,
                        help='weight decay')
    parser.add_argument('--bs', type=int, default=1024,
                        help='batch size')
    parser.add_argument('--wait', type=int, default=200,
                        help='how long to wait for lower validation loss')

    parser.add_argument('--loss', type=str,
                        help='specify type of loss')

    parser.add_argument('--corr_mult', type=float, default=0.,
                        help='correlation penalty multiplier')

    parser.add_argument('--hsic_mult', type=float, default=0.,
                        help='correlation penalty multiplier')

    parser.add_argument('--ds_type', type=str, default="",
                        help='type of data set. real or synthetic. REAL for real. SYN for synthetic')

    parser.add_argument('--test_ratio', type=float, default=0.4,
                        help='ratio of test set size')

    parser.add_argument('--save_training_results', type=int, default=0,
                        help='1 for saving results during training, or 0 for not saving')

    parser.add_argument('--method', type=str, default='QR',
                        help='method to use (QR or qr_forest)')

    args = parser.parse_args()

    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
    device_name = 'cuda:0' if torch.cuda.is_available() else 'cpu'
    device = torch.device(device_name)
    args.device = device
    if args.method not in ['QR', 'qr_forest']:
        raise ValueError('method arg not valid')

    return args


def update_results_during_training(y_upper, y_lower, x, y, set_name, results_dict, alpha):
    with torch.no_grad():
        if len(x) == 0 or len(y) == 0:
            return
        y = y.reshape(-1).to(device)
        idx = np.random.permutation(len(x))  # [:len(xx)]
        x = x[idx].to(device)
        quantiles = torch.Tensor([alpha / 2, 1 - alpha / 2]).to(device)
        

        if torch.is_tensor(y):
            curr_y = y.cpu().detach().numpy()[idx]
        else:
            curr_y = y[idx]
        in_the_range = ((curr_y >= y_lower) & (curr_y <= y_upper))
        lengths = (y_upper - y_lower)

        if 'pearsons_correlation' + '_over_' + set_name not in results_dict:
            results_dict['pearsons_correlation' + '_over_' + set_name] = []

        results_dict['pearsons_correlation' + '_over_' + set_name] += [
            stats.pearsonr(in_the_range, lengths)[0]]

        if 'coverage' + '_over_' + set_name not in results_dict:
            results_dict['coverage' + '_over_' + set_name] = []

        results_dict['coverage' + '_over_' + set_name] += [np.mean(in_the_range)]

        if 'interval_lengths' + '_over_' + set_name not in results_dict:
            results_dict['interval_lengths' + '_over_' + set_name] = []

        results_dict['interval_lengths' + '_over_' + set_name] += [np.mean(lengths)]


In [8]:
from Experiments.EXP1.TestPerform import testPerform_projKernel
from sklearn import random_projection
from sklearn.ensemble import RandomForestRegressor
import torch
from src.kernel_methods import kernel_estimator
from losses import independence_penalty



def mass_exp(DATA_NAMES, SEEDS = range(0, 1), recal = 0.3, ker_range = [10, 20, 30, 50], resample_perce = 5):

    data_type = REAL_DATA

    # DATA_NAMES = ['meps_19', 'meps_20', 'meps_21', 'facebook_1', 'facebook_2','blog_data']




    save_results_during_training = True


    alpha = 0.1


    for d in DATA_NAMES:
        print("-------------------------------------------------")
        if save_results_during_training:
            results_during_training = {}
            for s in SEEDS:
                results_during_training[s] = {}

        for s in tqdm.tqdm(SEEDS):

            arg_data = str(d)
            arg_seed = s

            set_seeds(arg_seed)
            data_args = Namespace(dataset=arg_data, seed=arg_seed)

            # if data_type == REAL_DATA:
            # Fetching data
            data_out = get_scaled_data(arg_data, arg_seed, recal_prop = recal)
            x_train, y_train = data_out.x_train, data_out.y_train
            unscaled_x_train = None
            unscaled_x_test = None
            minority_group_uncertainty = None
            group_feature = None


            x_tr, x_va, x_te, y_tr, y_va, y_te, y_al = \
                data_out.x_tr, data_out.x_va, data_out.x_te, data_out.y_tr, \
                data_out.y_va, data_out.y_te, data_out.y_al
            
            if resample_perce > 0:
                
                x_va = x_va[:int(len(x_va)/resample_perce)]
                y_va = y_va[:int(len(y_va)/resample_perce)]
                x_te = x_te[:int(len(x_te)/resample_perce)]
                y_te = y_te[:int(len(y_te)/resample_perce)]
            
            
            print("finish loading "+d)


            ## our method

            # -------------------------------------------------------------------------

            depth = 10

            train_X = x_tr.numpy()
            train_Y = y_tr.view(-1).numpy()
            test_X = x_te
            test_Y = y_te.cuda().view(-1)
            recal_X = x_va
            recal_Y = y_va.cuda().view(-1)

            rf_model = RandomForestRegressor(max_depth=depth, random_state=0)
            rf_model.fit(train_X, train_Y)




            n_component = 20
            transformer = random_projection.GaussianRandomProjection(n_components = n_component)
            reformer = lambda x : torch.Tensor(transformer.fit_transform(x.cpu().numpy()))

            for width in ker_range:
                print(width)

                record = testPerform_projKernel(
                    test_X, test_Y, recal_X, recal_Y, 
                    model_name = "RFKernel_RandomProj", model= rf_model, reformer= reformer, wid = width) 

                print(record)



                recal_mean = torch.Tensor(rf_model.predict(recal_X.cpu().numpy())).cuda()
                test_mean = torch.Tensor(rf_model.predict(test_X.cpu().numpy())).cuda()



                test_Z =  reformer(test_X)

                recal_Z = reformer(recal_X)




                eps_diffQuants = kernel_estimator(
                    test_Z = test_Z.cuda(),
                    recal_Z = recal_Z.cuda(),
                    recal_epsilon = torch.Tensor(recal_Y - recal_mean).cuda(),
                    quants = np.array([alpha/2, 1-alpha/2]),
                    wid= width
                )

                y_diffQuants = (eps_diffQuants + test_mean.view(1,-1).repeat(len(eps_diffQuants),1)).cpu().numpy()

                
                y_lower = y_diffQuants[0]
                y_upper = y_diffQuants[1]
                
                
                # result_vaca = {}

                # update_results_during_training(y_upper, y_lower, x_te, y_te, "test", result_vaca, alpha)

                # print(result_vaca)
                
                # print("HSIC: ", independence_penalty(y_te.cpu(), torch.Tensor(y_lower).view(1,-1), torch.Tensor(y_upper).view(1,-1)))


            # --------------------------------------------------------------------------


            

mass_exp(DATA_NAMES = ['meps_19', 'meps_20', 'meps_21', 'facebook_1', 'facebook_2','blog_data'], SEEDS = range(0, 1), recal = 0.3, ker_range = [5, 10, 20, 50, 100, 150], resample_perce = 4)      

-------------------------------------------------


  0%|          | 0/1 [00:00<?, ?it/s]

finish loading meps_19
5
{'MACE_Loss': 0.031568244099617004, 'AGCE_Loss': 0.10430365055799484, 'CheckScore': 0.12330325692892075}
10
{'MACE_Loss': 0.02072712406516075, 'AGCE_Loss': 0.09031034260988235, 'CheckScore': 0.11724480241537094}
20
{'MACE_Loss': 0.008647356182336807, 'AGCE_Loss': 0.11692307889461517, 'CheckScore': 0.11700228601694107}
50
{'MACE_Loss': 0.010033221915364265, 'AGCE_Loss': 0.06763588637113571, 'CheckScore': 0.11715546250343323}


100%|██████████| 1/1 [00:05<00:00,  5.12s/it]


100
{'MACE_Loss': 0.010636065155267715, 'AGCE_Loss': 0.08348485082387924, 'CheckScore': 0.11720244586467743}
150
{'MACE_Loss': 0.01083358284085989, 'AGCE_Loss': 0.09828563779592514, 'CheckScore': 0.11719542741775513}
-------------------------------------------------


  0%|          | 0/1 [00:00<?, ?it/s]

finish loading meps_20
5
{'MACE_Loss': 0.03302071616053581, 'AGCE_Loss': 0.10008786618709564, 'CheckScore': 0.15000073611736298}
10
{'MACE_Loss': 0.022303583100438118, 'AGCE_Loss': 0.07036971300840378, 'CheckScore': 0.14997360110282898}
20
{'MACE_Loss': 0.009158487431704998, 'AGCE_Loss': 0.09048483520746231, 'CheckScore': 0.14989732205867767}
50


100%|██████████| 1/1 [00:05<00:00,  5.66s/it]

{'MACE_Loss': 0.008125373162329197, 'AGCE_Loss': 0.056538376957178116, 'CheckScore': 0.1497844159603119}
100
{'MACE_Loss': 0.008563229814171791, 'AGCE_Loss': 0.04817577078938484, 'CheckScore': 0.14970393478870392}
150
{'MACE_Loss': 0.008814041502773762, 'AGCE_Loss': 0.040113117545843124, 'CheckScore': 0.14965838193893433}


100%|██████████| 1/1 [00:05<00:00,  5.66s/it]


-------------------------------------------------


  0%|          | 0/1 [00:00<?, ?it/s]

finish loading meps_21
5
{'MACE_Loss': 0.07681116461753845, 'AGCE_Loss': 0.13475805521011353, 'CheckScore': 0.15599632263183594}
10
{'MACE_Loss': 0.060255035758018494, 'AGCE_Loss': 0.09554856270551682, 'CheckScore': 0.15544170141220093}
20
{'MACE_Loss': 0.04824228957295418, 'AGCE_Loss': 0.13871794939041138, 'CheckScore': 0.15455974638462067}
50
{'MACE_Loss': 0.03997792303562164, 'AGCE_Loss': 0.1491723358631134, 'CheckScore': 0.15417414903640747}


100%|██████████| 1/1 [00:05<00:00,  5.09s/it]


100
{'MACE_Loss': 0.037829577922821045, 'AGCE_Loss': 0.07804243266582489, 'CheckScore': 0.15415546298027039}
150
{'MACE_Loss': 0.0375482514500618, 'AGCE_Loss': 0.07106993347406387, 'CheckScore': 0.15417298674583435}
-------------------------------------------------


  0%|          | 0/1 [00:00<?, ?it/s]

finish loading facebook_1
5
{'MACE_Loss': 0.026011407375335693, 'AGCE_Loss': 0.06906558573246002, 'CheckScore': 0.11090192943811417}
10
{'MACE_Loss': 0.024971654638648033, 'AGCE_Loss': 0.06378703564405441, 'CheckScore': 0.10838855803012848}
20
{'MACE_Loss': 0.0244070403277874, 'AGCE_Loss': 0.07220427691936493, 'CheckScore': 0.10851825028657913}
50
{'MACE_Loss': 0.024301471188664436, 'AGCE_Loss': 0.06685703247785568, 'CheckScore': 0.10851526260375977}
100


100%|██████████| 1/1 [00:16<00:00, 16.54s/it]


{'MACE_Loss': 0.024340569972991943, 'AGCE_Loss': 0.05918869748711586, 'CheckScore': 0.10850077122449875}
150
{'MACE_Loss': 0.024301469326019287, 'AGCE_Loss': 0.07705727219581604, 'CheckScore': 0.10850095748901367}
-------------------------------------------------


  0%|          | 0/1 [00:00<?, ?it/s]

finish loading facebook_2
5
{'MACE_Loss': 0.016712097451090813, 'AGCE_Loss': 0.029018931090831757, 'CheckScore': 0.11480104178190231}
10
{'MACE_Loss': 0.013738510198891163, 'AGCE_Loss': 0.0515252910554409, 'CheckScore': 0.1148904412984848}
20
{'MACE_Loss': 0.012179391458630562, 'AGCE_Loss': 0.039712172001600266, 'CheckScore': 0.11473292857408524}
50
{'MACE_Loss': 0.01035897433757782, 'AGCE_Loss': 0.04481220245361328, 'CheckScore': 0.11474768072366714}
100
{'MACE_Loss': 0.00984327495098114, 'AGCE_Loss': 0.03219832479953766, 'CheckScore': 0.11474159359931946}
150


100%|██████████| 1/1 [00:31<00:00, 31.50s/it]


{'MACE_Loss': 0.009774412959814072, 'AGCE_Loss': 0.046305425465106964, 'CheckScore': 0.11474265158176422}
-------------------------------------------------


  0%|          | 0/1 [00:00<?, ?it/s]

finish loading blog_data
5
{'MACE_Loss': 0.028894590213894844, 'AGCE_Loss': 0.051696863025426865, 'CheckScore': 0.15224525332450867}
10
{'MACE_Loss': 0.01265603955835104, 'AGCE_Loss': 0.05269784852862358, 'CheckScore': 0.1384609192609787}
20
{'MACE_Loss': 0.006692203693091869, 'AGCE_Loss': 0.02705974318087101, 'CheckScore': 0.13754688203334808}
50
{'MACE_Loss': 0.0065119918435812, 'AGCE_Loss': 0.036810193210840225, 'CheckScore': 0.1374085694551468}
100
{'MACE_Loss': 0.0074704778380692005, 'AGCE_Loss': 0.04855223372578621, 'CheckScore': 0.13744112849235535}


100%|██████████| 1/1 [00:35<00:00, 35.69s/it]

150
{'MACE_Loss': 0.007481812033802271, 'AGCE_Loss': 0.046855051070451736, 'CheckScore': 0.13742966949939728}



