In [1]:
import tqdm
def nop(it, *a, **k):
    return it

tqdm.tqdm = nop
from QUERYLANG import CFQL, Instances
import queries
from optimization import l0_metric 

from tqdm.notebook import tqdm as tqdm_notebook
import pickle


import warnings
warnings.filterwarnings('ignore')

# Load trained models

In [2]:
models = open('models/Adult Income/adult_models.pkl', 'rb')
instances = open('Instances/Adult Income/adult_test_instances.pkl', 'rb')
models = pickle.load(models)
instances = pickle.load(instances)

# Init CfDB object 

In [4]:
cfql = CFQL(instances, models)

# False Positive predictions

In [5]:
prediction_q = ''' 
SELECT T.PredictionId
FROM (
    SELECT Predictions.PredictionId , ROW_NUMBER() OVER(PARTITION BY Predictions.ClassifierId) AS rank
    FROM Instances, Predictions
    WHERE Instances.InstanceId = Predictions.InstanceId
      AND Instances.income = 0 and  Predictions.Label = 1
      AND Predictions.ClassifierId IN (0,1)) as T
WHERE T.rank <= 5
'''

## CounterFactual View

In [None]:
# counterfactuals that do not change the gender and  the race
cfs_q = cfs_query=(queries.AND_NOT,{'features':['gender','race']})

cfql.create_cfs_view(cf_type='GrowingSpheresCFs', prediction_query=prediction_q, cfs_query=cfs_q)
cfql.create_cfs_view(cf_type='CecCFs', prediction_query=prediction_q, cfs_query=cfs_q)
cfql.create_cfs_view(cf_type='DiverseCFs', prediction_query=prediction_q, cfs_query=cfs_q)

## Analysis: CF - L0

In [None]:
suffix = 1
Q = f''' 
SELECT ClassifierId, AVG(L0) as L0
FROM (
    SELECT ClassifierId, my_cfs_{suffix}.CfId , COUNT(*) AS L0
    FROM  my_cfs_{suffix}, my_prediction_cfs_{suffix}, Predictions
    WHERE my_cfs_{suffix}.CfId = my_prediction_cfs_{suffix}.CfId 
      AND my_prediction_cfs_{suffix}.PredictionId = Predictions.PredictionId
      GROUP BY ClassifierId, my_cfs_{suffix}.CfId ) AS T
GROUP BY ClassifierId
'''

df = cfql.execute(Q, parallel=False).replace({'ClassifierId':{0:'Random Forest', 1:'Linear'}})
df.rename(columns = {'ClassifierId':'Classifier'})

# CFs - highlight the difference

In [None]:
Q = f'''
SELECT distinct CfId
FROM my_cfs_{suffix}
'''
cf_ids = cfql.execute(Q,parallel=False).values.reshape(-1)
cfql.reconstruct(cf_ids)

# EFFICIENT EVALUATION - for CeC and Random Forest

In [None]:
uniform_predictions='''
SELECT * 
FROM (
    SELECT *, ROW_NUMBER() OVER(partition by gender, race ORDER BY RANDOM() DESC) AS 'IdInGroup'
    FROM Instances, Predictions
    WHERE Instances.instanceId = Predictions.instanceId
      AND Predictions.ClassifierId = 0
      AND Predictions.Label = 0
    ) AS T
WHERE T.'IdInGroup'<=5
'''

In [None]:
# A query searching for CFs that do not modify the gender and race.
cfs_not_and = cfs_query=(queries.NOT_AND,{'features':['gender','race']})

cfql.create_cfs_view(cf_type='CecCFs', prediction_query=uniform_predictions, cfs_query=cfs_not_and)

In [None]:
Q = """ 
SELECT * FROM  my_cfs_4
"""
cfql.batch_execute(Q, metric=l0_metric, threshold=2)