In [1]:
import pandas as pd

from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.optimizers import Adam

from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import StandardScaler

from mapie.regression import MapieQuantileRegressor

2024-11-19 11:18:58.152873: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-11-19 11:18:58.315533: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# **Prepare data**

Load, preprocess and split data into train, test and calibration sets:

In [2]:
data = fetch_california_housing()
X, y = data.data, data.target

scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test_cal, y_train, y_test_cal = train_test_split(X, y, test_size=0.3, random_state=42)
X_test, X_cal, y_test, y_cal = train_test_split(X_test_cal, y_test_cal, test_size=0.5, random_state=42)

print('Train: ', len(X_train))
print('Test: ', len(X_test))
print('Calibration: ', len(X_cal))

Train:  14448
Test:  3096
Calibration:  3096


# **Train and save model**

Create, train and save Keras model:

In [3]:
nn_model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(32, activation='relu'),
    Dense(1)
])

nn_model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')

nn_model.fit(X_train, y_train, epochs=20, batch_size=32,
             validation_split=0.2, verbose=0)

nn_model.save('model.keras')

2024-11-19 11:19:08.075230: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:266] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected


# **MAPIE**

Load and wrap the model for `sklearn` compatibility (the wrapper assumes the model is already trained):

In [4]:
class TrainedKerasRegressorWrapper(BaseEstimator, RegressorMixin):
    def __init__(self, model):
        self.model = model

    def fit(self, X, y):
        return self

    def predict(self, X):
        return self.model.predict(X).flatten()

    def __sklearn_is_fitted__(self):
        return True


loaded_model = load_model('model.keras')

model = TrainedKerasRegressorWrapper(loaded_model)

MAPIE quantile regression:

In [5]:
model_list = [model, model, model]

In [6]:
mapie_regressor = MapieQuantileRegressor(
    estimator=model_list, cv='prefit')

mapie_regressor.fit(X_cal, y_cal)

predictions, intervals = mapie_regressor.predict(X_test)

lower_intervals = intervals[:, 0]
upper_intervals = intervals[:, 1]

results = pd.DataFrame({
    'Prediction': predictions.flatten(),
    'Lower Interval': lower_intervals.flatten(),
    'Upper Interval': upper_intervals.flatten(),
    'Amplitude': upper_intervals.flatten() - lower_intervals.flatten(),
    'Actual Value': y_test
})

results.head()







Unnamed: 0,Prediction,Lower Interval,Upper Interval,Amplitude,Actual Value
0,1.29862,0.450377,2.146864,1.696487,1.225
1,3.612658,2.764414,4.460901,1.696487,3.204
2,4.489323,3.641079,5.337566,1.696487,4.507
3,2.928106,2.079862,3.776349,1.696487,2.343
4,1.613712,0.765468,2.461955,1.696487,1.019


**`estimator` tiene que ser una lista de 3 modelos**

```
    estimators_: List[RegressorMixin]
        - [0]: Estimator with quantile value of alpha/2
        - [1]: Estimator with quantile value of 1 - alpha/2
        - [2]: Estimator with quantile value of 0.5
```

Ver:

- https://github.com/scikit-learn-contrib/MAPIE/issues/448
- https://mapie.readthedocs.io/en/stable/generated/mapie.regression.MapieQuantileRegressor.html#mapie-regression-mapiequantileregressor