In [1]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import RandomizedSearchCV
import numpy as np
import torch 
from src.data.make_housing_dataset import CaliforniaHousing
from sklearn.metrics import r2_score
from scipy.stats import randint, uniform
import pickle

from src.models.GB_quantile_regressor import GB_quantile_regressor, Conformalized_quantile_regression_logits
from mapie.quantile_regression import MapieQuantileRegressor
from src.models.CQR import CQR_logits
from src.models.regFNN import RegFNN
from typing import Iterable, List, Optional, Tuple, Union, cast
from numpy.typing import  NDArray



In [2]:
train_set = CaliforniaHousing(
    split="train", in_folder='../data/raw', out_folder='../data/processed')
test_set = CaliforniaHousing(
    split="test", in_folder='../data/raw', out_folder='../data/processed')
train_loader = torch.utils.data.DataLoader(train_set, batch_size=32, shuffle=True)
calib_set = CaliforniaHousing(
    split="calib", in_folder='../data/raw', out_folder='../data/processed')
calib_loader = torch.utils.data.DataLoader(calib_set, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=32)

X_train = train_set.data.numpy()
y_train = train_set.targets.numpy()

X_test = test_set.data.numpy()
y_test =test_set.targets.numpy()

Loaded from pre-processed files
Loaded from pre-processed files
Loaded from pre-processed files


In [2]:
quantiles = [0.05, 0.5, 0.95]
predictions = {}

for quantile in quantiles:
    params_distributions = dict(
        max_leaf_nodes=randint(low=10, high=50),
        max_depth=randint(low=3, high=20),
        n_estimators=randint(low=50, high=300),
        learning_rate=uniform()
    )
    qr = GradientBoostingRegressor(alpha=quantile, loss='quantile')
    model = RandomizedSearchCV(qr, params_distributions)
    y_pred = model.fit(X_train, y_train).predict(X_test)
    predictions[quantile] = y_pred

NameError: name 'X_train' is not defined

In [4]:
with open('../models/trained_gbreg0.05.pkl', 'rb') as p:
    model_005 = pickle.load(p)
with open('../models/trained_gbreg0.95.pkl', 'rb') as p:
    model_095 = pickle.load(p)


In [22]:
model = GB_quantile_regressor(model_005, model_095)

In [16]:
q_lo, q_hi = model.predict(X_test)

In [72]:
alpha = 0.1
X_cal = test_loader.dataset.data.detach().cpu().numpy()
y_cal = test_loader.dataset.targets.detach().cpu().numpy()
model.predict(X_cal)[1]


array([1.47830702, 1.72023197, 1.23981333, ..., 1.19121525, 1.17053788,
       1.21129521])

In [83]:
class GB_conformal_quantile_regressor:
    def __init__(self, model, calib_loader, alpha):
        X_cal = calib_loader.dataset.data.detach().cpu().numpy()
        y_cal = calib_loader.dataset.targets.detach().cpu().numpy()
        print(model.model_low)
        self.model_low = MapieQuantileRegressor(
            [model.model_low], alpha=alpha/2, cv='prefit').fit(X_cal, y_cal)
        self.model_high = MapieQuantileRegressor(
            [model.model_high], alpha=1-(alpha/2), cv='prefit').fit(X_cal, y_cal)
    def predict(self, inputs):
        return [self.model_low.predict(inputs), self.model_high.predict(inputs)]


In [3]:
low = GradientBoostingRegressor(loss='quantile',alpha=0.05).fit(X_train, y_train)
median = GradientBoostingRegressor(loss='quantile',alpha=0.5).fit(X_train, y_train)
up = GradientBoostingRegressor(loss='quantile',alpha=0.1).fit(X_train, y_train)

In [8]:

alpha = 0.1
X_cal = calib_loader.dataset.data.detach().cpu().numpy()
y_cal = calib_loader.dataset.targets.detach().cpu().numpy()
cqr = MapieQuantileRegressor([low, up, median], alpha=alpha, cv="prefit")

In [9]:
cqr.fit(X_cal, y_cal)



In [79]:
pred, pis = cqr.predict(X_test)

present issues as the upper quantile values might be higher than the
lower quantile values.


In [80]:
pis[:,0, :]

array([[ 0.14959892],
       [ 0.41463179],
       [ 0.19370354],
       ...,
       [-0.11127592],
       [-0.0717903 ],
       [-0.02070856]])

In [45]:
1 - alpha/2

0.95

In [67]:
cqr.estimators_[2].predict(X_test)

array([0.93315248, 1.30475213, 0.76046248, ..., 0.71442029, 0.78294266,
       0.88589693])

In [86]:
r2_score(y_true=y_test, y_pred=cqr.estimators_[0].predict(X_test))

0.271760330650876

In [99]:
MapieQuantileRegressor(estimator=model_005, alpha=0.1).fit(X_cal, y_cal)

ValueError: The base model does not seem to be accepted by MapieQuantileRegressor. 
Give a base model among: 
``quantile_estimator_params.keys()``Or, add your base model to ``quantile_estimator_params``.

In [7]:
model = CQR_logits(RegFNN(), calib_loader=calib_loader, alpha=0.1)

In [10]:
str(model.__class__)

"<class 'src.models.CQR.CQR_logits'>"

In [12]:
str(model.__class__).startswith("<class 'src.models.CQR")

True

In [4]:
m = GB_quantile_regressor(low=low, up=up, median=median)

In [45]:
y_pred_low=low.predict(X_test).reshape(-1, 1)
y_pred_up = up.predict(X_test).reshape(-1, 1)
np.stack([y_pred_low, y_pred_up], axis=1)

array([[[0.6601436 ],
        [0.81229715]],

       [[0.97332371],
        [1.04386302]],

       [[0.65232105],
        [0.79945844]],

       ...,

       [[0.56359683],
        [0.6430463 ]],

       [[0.59126942],
        [0.62283087]],

       [[0.64179347],
        [0.73259461]]])

In [50]:
pred, y_pis = cqr.predict(X_test)
# y_pis[:, 0, :], y_pis[:, 1, :]

present issues as the upper quantile values might be higher than the
lower quantile values.


In [51]:
pred

array([0.95755559, 1.33461035, 0.79954006, ..., 0.70422397, 0.75772417,
       0.89313995])

In [46]:
pred, ym_pis = m.predict(X_test)
ym_pis.reshape(1, -1)


array([[0.6601436 , 0.81229715, 0.97332371, ..., 0.62283087, 0.64179347,
        0.73259461]])

In [47]:
pred

array([0.95755559, 1.33461035, 0.79954006, ..., 0.70422397, 0.75772417,
       0.89313995])

In [52]:
calib_loader.dataset.data

tensor([[   3.4833,   15.0000,    5.7462,  ...,    2.5564,   33.1500,
         -117.1300],
        [   5.5696,   17.0000,    5.9844,  ...,    3.6562,   33.1500,
         -117.1400],
        [   5.0346,   16.0000,    6.3427,  ...,    4.2584,   33.1500,
         -117.1400],
        ...,
        [   4.1833,   22.0000,    6.5642,  ...,    3.1537,   37.7600,
         -121.1100],
        [   3.4426,    7.0000,    5.4164,  ...,    3.2852,   37.7300,
         -121.1600],
        [   3.4009,   22.0000,    6.2148,  ...,    3.3043,   37.7600,
         -121.2500]], dtype=torch.float64)

In [2]:
inputs = torch.tensor([[0.4630, 2.0476],
        [2.2505, 3.0956],
        [1.1385, 3.4338],
        [1.5190, 3.5071],
        [2.0850, 2.6518],
        [1.4225, 2.4600],
        [0.9652, 2.3409],
        [1.1852, 4.8402],
        [0.8857, 2.5145],
        [1.7450, 3.5207],
        [1.1903, 2.0947],
        [1.1560, 2.0714],
        [1.7541, 2.7718],
        [1.5430, 2.9079],
        [0.7063, 1.6242],
        [0.9611, 2.4948],
        [1.6540, 2.8775],
        [0.9596, 1.6170],
        [0.4651, 0.9665],
        [1.2414, 1.8720],
        [2.4025, 4.9447],
        [1.2838, 3.1998],
        [0.6027, 1.3405],
        [0.9855, 1.7962],
        [1.5674, 2.4610],
        [0.9095, 1.2087],
        [1.9868, 2.4648],
        [1.9655, 3.0575],
        [2.1278, 2.8847],
        [1.9226, 3.1330],
        [0.7184, 0.9206],
        [2.1120, 3.3875]])
targets = torch.tensor([0, 2, 1, 3, 1, 2, 1, 1, 2, 2, 2, 1, 2, 3, 1, 0, 2, 0, 0, 2, 2, 2, 0, 1,
        2, 0, 1, 2, 2, 3, 0, 2])

In [3]:
model = Conformalized_quantile_regression_logits()

In [4]:
model.fit_precomputed_logits(inputs, targets)

In [2]:
0.4630 - 0.9611

-0.49809999999999993