# Conformal Prediction - Notebook

### Table of Contents

* [Import Libraries and Data](#chapterLibraryData)

* [Chapter 1. Manual Prediction Sets generation ](#chapter1)
* [Chapter 2. MapieClassifier Prediction Sets generation ](#chapter2)
  * [Section 2.1 Coverage and Set Size](#section_2_1)
  * [Section 2.2 Level Generation](#section_2_2)
  * [Section 2.3 Data File export](#section_2_3)

### Import Libraries and Data <a class="anchor" id="chapterLibraryData"></a>

In [6]:
import joblib
import pandas as pd
import numpy as np
import plotly.express as px
from mapie.classification import MapieClassifier
from mapie.metrics import classification_coverage_score
from mapie.metrics import classification_mean_width_score

In [None]:
model_rf = joblib.load('../models/model_rf.joblib')

X_test = pd.read_csv('../data/X_test_cp.csv',index_col=0)
y_test = pd.read_csv('../data/y_test_cp.csv',index_col=0)
y_test = y_test.iloc[:, 0]
X_calibration = pd.read_csv('../data/X_calibration_cp.csv',index_col=0)
y_calibration = pd.read_csv('../data/y_calibration_cp.csv',index_col=0)
y_calibration = y_calibration .iloc[:, 0]

le = joblib.load('label_encoder.joblib')

### Manual Prediction Sets generation <a class="anchor" ></a>

In [None]:
model_rf.predict_proba(X_test)

In [None]:
n = len(X_calibration)
predictions = model_rf.predict_proba(X_calibration)
prob_true_class = predictions[np.arange(n), y_calibration]
scores = 1 - prob_true_class

alpha = 0.05
q_level = np.ceil((n+1)*(1-alpha))/n
qhat = np.quantile(scores, q_level)
prediction_sets = (1 - model_rf.predict_proba(X_test) <= qhat)

print(qhat)
print()
print(prediction_sets)
print()
for i in range(5): print(le.classes_[prediction_sets[i]])

In [None]:
model_rf.predict_proba(X_test)[2]

In [None]:
prediction_sets[2]

### MapieClassifier Prediction Sets generation

In [None]:
cp = MapieClassifier(estimator=model_rf, cv="prefit", method="score")
cp.fit(X_calibration, y_calibration)
y_pred, y_set = cp.predict(X_test, alpha=0.05)
y_set = np.squeeze(y_set)
y_set = [list(le.classes_[subarr]) for subarr in y_set]

set_sizes = [len(sublist) for sublist in y_set]

d = {"set": y_set, "size": set_sizes}
df = pd.DataFrame(d)
df

In [None]:
df["set"].value_counts()

### Coverage and Set Size

In [None]:
y_pred, y_set = cp.predict(X_test, alpha=0.05)
y_set = np.squeeze(y_set)
cov = classification_coverage_score(y_test, y_set) 
setsize = classification_mean_width_score(y_set) 
print('Coverage: {:.2%}'.format(cov))
print("Avg. set size: {:.2f}".format(setsize))

In [None]:
def class_wise_performance(y_new, y_set, classes):

    df = pd.DataFrame()
    for i in range(len(classes)):
        y_new_class = y_new[y_new == classes[i]]
        y_set_class = y_set[y_new == classes[i]]
        cov = classification_coverage_score(y_new_class, y_set_class)
        size = classification_mean_width_score(y_set_class)
        temp_df = pd.DataFrame({
            "class": [classes[i]],
            "coverage": [cov],
            "avg. set size": [size]
        })
        df = pd.concat([df, temp_df], ignore_index=True)

    return df

classes = [0, 1] 
print(class_wise_performance(y_test, y_set, le.classes_))

### Level Generation

In [None]:
predictions = model_rf.predict(X_test)
X_test["Predictions"]= predictions
X_test["Actual"] = y_test
X_test["Sets"] = y_set

In [None]:
def determine_level(sets):
    if sets == [0] or sets == [1]:
        return 'certain'
    elif sets == [0, 1]:
        return 'uncertain'

X_test['level'] = X_test['Sets'].apply(determine_level)

In [None]:
X_test['level'].value_counts()

In [None]:
X_test[X_test['Predictions'] == 1].iloc[0]

### Data file export

In [None]:
X_test.to_csv("conformal_prediction.csv", index=True)  