<a href="https://colab.research.google.com/github/jklbpl/Conformal/blob/main/regression_nonconf_abserror_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Regression example

## Non-conformal predictor. Regression example.

In [18]:
!pip install nonconformist

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [19]:
from sklearn.ensemble import RandomForestRegressor
from nonconformist.cp import IcpRegressor
from nonconformist.nc import NcFactory

In [20]:
from nonconformist.nc import SignErrorErrFunc
from nonconformist.nc import AbsErrorErrFunc

In [21]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.datasets import load_diabetes

In [22]:
#data_data, data_target = fetch_california_housing(return_X_y= True)
data_data, data_target = load_diabetes(return_X_y= True)
df_features = pd.DataFrame(data_data)
df_target = pd.DataFrame(data_target)
idx_size = df_target.size

np.random.seed(2)

idx = np.random.permutation(len(data_data))

# test = 10%, test(test+calib) = 90% (80%+20%)
test_size = int(idx_size  * 0.1)
train_size = idx_size  - test_size
calib_size = int(train_size * 0.2)
train_size = train_size - calib_size

idx_train, idx_cal, idx_test = idx[:train_size], idx[train_size:train_size + calib_size], idx[train_size + calib_size:]


print('Test size: {}'.format(test_size))
print('Calibration size: {}'.format(calib_size))
print('Train size: {}'.format(train_size))


Test size: 44
Calibration size: 79
Train size: 319


In [23]:
model = RandomForestRegressor()	# Create the underlying model
nc = NcFactory.create_nc(model,err_func = AbsErrorErrFunc())	# Create a default nonconformity function
icp = IcpRegressor(nc)			# Create an inductive conformal regressor

# Fit the ICP using the proper training set
icp.fit(data_data[idx_train, :], data_target[idx_train])

# Calibrate the ICP using the calibration set
icp.calibrate(data_data[idx_cal, :], data_target[idx_cal])

# Produce predictions for the test set, with confidence 95%
prediction = icp.predict(data_data[idx_test, :], significance=0.05)

# Print the first 5 predictions
print(prediction[:5, :])

[[ 41.73 248.13]
 [  8.65 215.05]
 [ 81.13 287.53]
 [-32.73 173.67]
 [100.35 306.75]]


In [24]:
predictions_cal = model.predict(data_data[idx_cal,:])
predictions_test = model.predict(data_data[idx_test,:])

In [25]:
#abs error func
def abserror(prediction_set, y):
  return np.abs(prediction_set - y)

In [26]:
#apply inverse abs err, or err_distance
def abs_err_inv(cal_score, significance, sign_is_nondef):
  if sign_is_nondef is False:
    cal_scores_sorted =  np.sort(cal_score)[::-1]
    quantile_significance = np.quantile(cal_scores_sorted, 1-significance) 
    cal_scores_sorted_bool = cal_scores_sorted >= quantile_significance
    for i in range(len(cal_scores_sorted)):
      if cal_scores_sorted_bool[i]:
        number = i
    return cal_scores_sorted[number]

  if sign_is_nondef is True:
    nc = np.sort(cal_score)[::-1]
    border = int(np.floor(significance * (nc.size + 1))) - 1
    border = min(max(border, 0), nc.size - 1)
    return np.vstack([nc[border], nc[border]])
  

In [27]:
cal_scores = abserror(predictions_cal, data_target[idx_cal])

In [29]:
border = abs_err_inv(cal_scores, 0.05,sign_is_nondef = False)
intervals = np.zeros((idx_test.size, 2))
intervals[:, 0] = predictions_test - border
intervals[:, 1] = predictions_test + border
print(np.sum(intervals != prediction))

0


In [30]:
predictions_test

array([144.93, 111.85, 184.33,  70.47, 203.55, 192.6 ,  76.99, 223.49,
        94.1 , 104.86, 194.87, 195.39,  90.28, 187.97, 120.21, 102.98,
       228.66, 152.52, 207.5 , 177.88,  79.93, 194.67, 194.5 , 254.97,
       186.91, 224.91, 196.32, 203.86, 140.33, 138.72, 188.31, 137.65,
        90.92,  78.39, 123.16, 142.08,  90.49, 208.16, 117.32, 107.08,
        98.1 , 135.74, 177.37, 222.47])

In [31]:
#no significance
# Produce predictions for the test set
prediction2 = icp.predict(data_data[idx_test, :])

# Print the first 5 predictions
#print(prediction2[:, :, 0])

In [33]:
#apply abs err inverse for each significance[i]
significance_nondefined = np.arange(0, 1, 0.01)
intervals_none = np.zeros((idx_test.size, 2, (significance_nondefined.size)))
for i, j in enumerate(significance_nondefined):

  borders = abs_err_inv(cal_scores, j,sign_is_nondef = False)
  intervals_none[:, 0, i] = predictions_test - borders
  intervals_none[:, 1, i] = predictions_test + borders

print(np.sum(prediction2[:,:,:] != intervals_none[:,:,:99]))
print(np.max(np.abs(prediction2 - intervals_none[:,:,:99])))

6072
5.100000000000023


In [None]:
significance_nondefined2 = np.arange(0.1, 1, 0.01)
intervals_none2 = np.zeros((idx_test.size, 2, (significance_nondefined2.size)))
norm = np.ones(int(idx_test.size))
for i, j in enumerate(significance_nondefined2):

  err_dist = abs_err_inv(cal_scores, j, sign_is_nondef = True)
  err_dist = np.hstack([err_dist] * idx_test.size)
  err_dist *= norm

  intervals_none2[:, 0, i] = predictions_test - err_dist[0, :]
  intervals_none2[:, 1, i] = predictions_test + err_dist[0, :]
