# Generating Counterfactuals

In this notebook, we will focus on generating counterfactuals from individual
datapoints. This will be implemented for the following models:

- Naive Bayes
- Fair Bayesian Network
- Fair Random Forest Classifier

By generating counterfactuals, we hope to gain insight into how the model uses
the different attributes in it's decisions.

In [1]:
import sys
import os

module_path = os.path.abspath(os.path.join(".."))
if module_path not in sys.path:
    sys.path.append(module_path)

from forseti.bayesnet import latentLabelClassifier, interpretableNaiveBayes
import pandas as pd
from random import sample
import numpy as np
from forseti.datproc import translate_categorical

df = pd.read_csv("data/adult.csv")

clf = interpretableNaiveBayes()

tmp = df
label = "income"

clf.train(label, tmp, "NB")

In [2]:
datapoint = df[
    (df.gender == 'Female') & 
    (df.race == 'Black') &
    (df.income == '<=50K')
].sample(1)

R = clf.generateCounterfactuals(datapoint, candidates=100, gen=5)

ValueError: No variable missing in data. Nothing to predict

In [None]:
R = R.replace(clf.codes_train)
a = a.replace(clf.codes_train)

In [None]:
a

Unnamed: 0,age,workclass,education,marital-status,occupation,relationship,race,gender,capital-gain,hours-per-week
29622,"(60.8, 75.4]",Private,HS-grad,Divorced,Adm-clerical,Not-in-family,White,Female,"(-4460.355, 16515.0]","(20.6, 40.2]"


In [None]:
R[R['O1'] < 0.5]

Unnamed: 0,age,workclass,education,marital-status,occupation,relationship,race,gender,capital-gain,hours-per-week,O1,O2,O3,O4
74,"(75.4, 90.0]",Private,1st-4th,Divorced,Adm-clerical,Not-in-family,White,Female,"(58257.0, 79128.0]","(20.6, 40.2]",0.0,0.7,3,0.1
97,"(60.8, 75.4]",Federal-gov,1st-4th,Separated,Adm-clerical,Not-in-family,Amer-Indian-Eskimo,Female,"(79128.0, 99999.0]","(20.6, 40.2]",0.0,0.5,5,0.0
135,"(60.8, 75.4]",Private,Doctorate,Divorced,Adm-clerical,Not-in-family,White,Female,"(16515.0, 37386.0]","(79.4, 99.0]",0.138279,0.7,3,0.1
146,"(60.8, 75.4]",Self-emp-inc,Doctorate,Divorced,Prof-specialty,Not-in-family,White,Female,"(-4460.355, 16515.0]","(79.4, 99.0]",0.287894,0.6,4,0.1
155,"(60.8, 75.4]",Private,Doctorate,Divorced,Prof-specialty,Not-in-family,White,Female,"(79128.0, 99999.0]","(79.4, 99.0]",0.0,0.6,4,0.1
179,"(60.8, 75.4]",Private,Doctorate,Divorced,Adm-clerical,Not-in-family,White,Female,"(79128.0, 99999.0]","(79.4, 99.0]",0.0,0.7,3,0.1
187,"(60.8, 75.4]",Private,Doctorate,Divorced,Prof-specialty,Not-in-family,White,Female,"(79128.0, 99999.0]","(20.6, 40.2]",0.0,0.7,3,0.1


In [None]:
df[df['gender'] == 'Female']

Unnamed: 0,age,workclass,education,marital-status,occupation,relationship,race,gender,capital-gain,hours-per-week,income
4,18,?,Some-college,Never-married,?,Own-child,White,Female,0,30,<=50K
8,24,Private,Some-college,Never-married,Other-service,Unmarried,White,Female,0,40,<=50K
12,26,Private,HS-grad,Never-married,Adm-clerical,Not-in-family,White,Female,0,39,<=50K
17,43,Private,HS-grad,Married-civ-spouse,Adm-clerical,Wife,White,Female,0,30,<=50K
18,37,Private,HS-grad,Widowed,Machine-op-inspct,Unmarried,White,Female,0,20,<=50K
...,...,...,...,...,...,...,...,...,...,...,...
48827,37,Private,Assoc-acdm,Divorced,Tech-support,Not-in-family,White,Female,0,40,<=50K
48830,43,State-gov,Some-college,Divorced,Adm-clerical,Other-relative,White,Female,0,40,<=50K
48837,27,Private,Assoc-acdm,Married-civ-spouse,Tech-support,Wife,White,Female,0,38,<=50K
48839,58,Private,HS-grad,Widowed,Adm-clerical,Unmarried,White,Female,0,40,<=50K
