# Some notes on Conformal Sets

By Andy Wheeler

Uses NIJ recidivism data as an example 

In [1]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier

# NIJ Recidivism data with some feature engineering
pdata = pd.read_csv('NIJRecid.csv') # NIJ recidivism data

# Train/test split and fit model
train = pdata[pdata['Training_Sample'] == 1]
test = pdata[pdata['Training_Sample'] == 0]

yvar = 'Recidivism_Arrest_Year1'
xvar = list(pdata)[2:]

# Random forest, need to set OOB to true
# for conformal (otherwise need to use a seperate calibration sample)
rf = RandomForestClassifier(max_depth=5,min_samples_leaf=100,random_state=10,n_estimators=1000,oob_score=True)
rf.fit(train[xvar],train[yvar])

# Out of bag predictions
probs = rf.oob_decision_function_
probs

array([[0.72943959, 0.27056041],
       [0.69507844, 0.30492156],
       [0.69035084, 0.30964916],
       ...,
       [0.88332402, 0.11667598],
       [0.8910954 , 0.1089046 ],
       [0.64774593, 0.35225407]])

In [2]:
# conditional predictions for actual 1's
p1 = probs[train[yvar]==1,1]

# recall 95% coverage
k = 95
cover95 = np.percentile(p1,100-k)
print(f'Threshold to have conformal set of {k}% for capturing recidivism')
print(f'{cover95:,.3f}')

# Now can check out of sample
ptest = rf.predict_proba(test[xvar])
out_cover = (ptest[test[yvar]==1,1] > cover95).mean()
print(f'\nOut of sample coverage at {k}%')
print(f'{out_cover:,.3f}')

Threshold to have conformal set of 95% for capturing recidivism
0.190

Out of sample coverage at 95%
0.951


In [3]:
# can also set the false negative rate in much the same way
p0 = probs[train[yvar]==0,0]

# false negative rate set to 5%
k = 95
cover95 = np.percentile(p0,100-k)
print(f'Threshold (for 0 class) to have conformal set of {k}% for low risk')
print(f'{cover95:,.3f}')

# Now can check out of sample
out_cover = (ptest[test[yvar]==0,0] > cover95).mean()
print(f'\nOut of sample coverage at {k}%')
print(f'{out_cover:,.3f}')

Threshold (for 0 class) to have conformal set of 95% for low risk
0.566

Out of sample coverage at 95%
0.953


In [4]:
# This models calibration is not very good, it is overfit
dfp = pd.DataFrame(probs,columns=['Pred0','Pred1'],index=train.index)
dfp['y'] = train[yvar]
dfp['bins'] = pd.qcut(dfp['Pred1'],10)
dfp.groupby('bins')[['y','Pred1']].sum()

Unnamed: 0_level_0,y,Pred1
bins,Unnamed: 1_level_1,Unnamed: 2_level_1
"(0.07869999999999999, 0.163]",140,235.249857
"(0.163, 0.208]",233,336.966457
"(0.208, 0.246]",357,411.398756
"(0.246, 0.277]",403,473.759776
"(0.277, 0.303]",506,523.80945
"(0.303, 0.327]",575,567.608369
"(0.327, 0.353]",646,612.189157
"(0.353, 0.38]",723,660.005332
"(0.38, 0.423]",810,720.943426
"(0.423, 0.559]",984,836.295588


In [5]:
# So lets do a logit model to try to set the false positive rate
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_curve

# Making a second calibration set
train1, cal1 = train_test_split(train,train_size=10000)
logitm = LogisticRegression(random_state=10,penalty=None,max_iter=100000)
logitm.fit(train1[xvar],train1[yvar])
probsl = logitm.predict_proba(cal1[xvar])

# Can see here that the calibration is much better
dflp = pd.DataFrame(probsl,columns=['Pred0','Pred1'],index=cal1.index)
dflp['y'] = cal1[yvar]
dflp['bins'] = pd.qcut(dflp['Pred1'],10)
dflp.groupby('bins')[['y','Pred1']].sum()

Unnamed: 0_level_0,y,Pred1
bins,Unnamed: 1_level_1,Unnamed: 2_level_1
"(0.0295, 0.114]",56,65.576285
"(0.114, 0.166]",101,113.708681
"(0.166, 0.209]",127,150.667646
"(0.209, 0.248]",184,184.134466
"(0.248, 0.286]",220,214.262853
"(0.286, 0.324]",241,244.615265
"(0.324, 0.368]",286,277.296813
"(0.368, 0.423]",327,317.760586
"(0.423, 0.504]",363,369.670743
"(0.504, 0.787]",449,469.151749


In [6]:
# The batch way to set the false positive rate
ptestl = logitm.predict_proba(test[xvar])
dftp = pd.DataFrame(ptestl,columns=['Pred0','Pred1'],index=test.index)
dftp['y'] = test[yvar]

dftp.sort_values(by='Pred1',ascending=False,inplace=True)
dftp['PredictedFP'] = (1 - dftp['Pred1']).cumsum()
dftp['AcutalFP'] = (dftp['y'] == 0).cumsum()
dftp['CumN'] = np.arange(dftp.shape[0]) + 1
dftp['PredRate'] = dftp['PredictedFP']/dftp['CumN']
dftp['ActualRate'] = dftp['AcutalFP']/dftp['CumN']
dftp.iloc[range(1000,7001,1000)]

Unnamed: 0,Pred0,Pred1,y,PredictedFP,AcutalFP,CumN,PredRate,ActualRate
3299,0.53104,0.46896,1,440.625069,437,1001,0.440185,0.436563
22901,0.611436,0.388564,0,1015.087203,1030,2001,0.50729,0.514743
24230,0.667773,0.332227,0,1655.994326,1650,3001,0.551814,0.549817
24136,0.716701,0.283299,0,2347.628958,2348,4001,0.586761,0.586853
17456,0.763459,0.236541,0,3088.117906,3095,5001,0.6175,0.618876
22570,0.815176,0.184824,0,3876.468594,3876,6001,0.64597,0.645892
17633,0.87867,0.12133,0,4721.816852,4727,7001,0.674449,0.675189


In [7]:
# Using precision to set the threshold (based on calibration set)
fp_set = 0.45
pr_data = precision_recall_curve(cal1[yvar], probsl[:,1])
loc = np.arange(pr_data[0].shape[0])[pr_data[0] > fp_set].min()
thresh_fp = pr_data[2][loc]

print(f'Threshold estimate for FP rate at {fp_set}')
print(f'{thresh_fp:,.3f}')

print(f'\nActual FP rate in test set at threshold {thresh_fp:,.3f}')
test_fprate = 1 - test[yvar][ptest[:,1] > thresh_fp].mean()
print(f'{test_fprate:,.3f}') # this is not a very good estimate!

Threshold estimate for FP rate at 0.45
0.333

Actual FP rate in test set at threshold 0.333
0.549


In [8]:
# I am not sure about this estimator either
from sklearn.metrics import det_curve

ds = det_curve(cal1[yvar], probsl[:,1]) # calibration sample
ds = pd.DataFrame(ds,index=['fpr','fnr','thresholds']).T
fpr_det = ds[ds['fpr'] < fp_set].head(1)
thresh_fp = fpr_det.iloc[0][2]

print(f'Threshold estimate for FP rate at {fp_set} (using sklearn det_curve)')
print(f'{thresh_fp:,.3f}')

print(f'\nActual FP rate in test set at threshold {thresh_fp:,.3f}')
test_fprate = 1 - test[yvar][ptest[:,1] > thresh_fp].mean()
print(f'{test_fprate:,.3f}') # this is terrible

Threshold estimate for FP rate at 0.45 (using sklearn det_curve)
0.272

Actual FP rate in test set at threshold 0.272
0.611
