In [1]:
!pip install RISE

Collecting RISE
  Downloading rise-5.7.1-py2.py3-none-any.whl (4.3 MB)
     |████████████████████████████████| 4.3 MB 4.8 MB/s            
Installing collected packages: RISE
Successfully installed RISE-5.7.1


In [8]:
# Installing the Package

In [6]:
!pip install dice-ml



In [4]:
import numpy as np
import timeit
import random

from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.ensemble import RandomForestClassifier

import dice_ml
from dice_ml.utils import helpers  # helper functions
from dice_ml import Dice

In [5]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [9]:
dataset = helpers.load_adult_income_dataset()

In [12]:
dataset.head()

Unnamed: 0,age,workclass,education,marital_status,occupation,race,gender,hours_per_week,income
0,28,Private,Bachelors,Single,White-Collar,White,Female,60,0
1,30,Self-Employed,Assoc,Married,Professional,White,Male,65,1
2,32,Private,Some-college,Married,White-Collar,White,Male,50,0
3,20,Private,Some-college,Single,Service,White,Female,35,0
4,41,Self-Employed,Some-college,Married,White-Collar,White,Male,50,0


In [14]:
adult_info = helpers.get_adult_data_info()
adult_info

{'age': 'age',
 'workclass': 'type of industry (Government, Other/Unknown, Private, Self-Employed)',
 'education': 'education level (Assoc, Bachelors, Doctorate, HS-grad, Masters, Prof-school, School, Some-college)',
 'marital_status': 'marital status (Divorced, Married, Separated, Single, Widowed)',
 'occupation': 'occupation (Blue-Collar, Other/Unknown, Professional, Sales, Service, White-Collar)',
 'race': 'white or other race?',
 'gender': 'male or female?',
 'hours_per_week': 'total work hours per week',
 'income': '0 (<=50K) vs 1 (>50K)'}

In [15]:
target = dataset["income"]
train_dataset, test_dataset, y_train, y_test = train_test_split(dataset,
                                                                target,
                                                                test_size=0.2,
                                                                random_state=0,
                                                                stratify=target)
x_train = train_dataset.drop('income', axis=1)
x_test = test_dataset.drop('income', axis=1)

In [17]:
x_test.head()

Unnamed: 0,age,workclass,education,marital_status,occupation,race,gender,hours_per_week
14946,29,Private,HS-grad,Married,Blue-Collar,White,Female,38
24228,50,Other/Unknown,Some-college,Married,Other/Unknown,White,Male,40
605,50,Private,Bachelors,Married,Professional,White,Male,40
6238,41,Private,School,Married,Blue-Collar,White,Male,30
25954,28,Other/Unknown,Assoc,Separated,Other/Unknown,White,Female,40


In [18]:
d = dice_ml.Data(dataframe=train_dataset, continuous_features=['age', 'hours_per_week'], outcome_name='income')

In [19]:
numerical = ["age", "hours_per_week"]
categorical = x_train.columns.difference(numerical)

categorical_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

transformations = ColumnTransformer(
    transformers=[
        ('cat', categorical_transformer, categorical)])

# Append classifier to preprocessing pipeline.
# Now we have a full prediction pipeline.
clf = Pipeline(steps=[('preprocessor', transformations),
                      ('classifier', RandomForestClassifier())])
model = clf.fit(x_train, y_train)

In [20]:
# Using sklearn backend
m = dice_ml.Model(model=model, backend="sklearn")
# Using method=random for generating CFs
exp = dice_ml.Dice(d, m, method="random")

In [21]:
e1 = exp.generate_counterfactuals(x_test[0:1], total_CFs=2, desired_class="opposite")
e1.visualize_as_dataframe(show_only_changes=True)

100%|██████████| 1/1 [00:00<00:00,  4.44it/s]

Query instance (original outcome : 0)





Unnamed: 0,age,workclass,education,marital_status,occupation,race,gender,hours_per_week,income
0,29,Private,HS-grad,Married,Blue-Collar,White,Female,38,0



Diverse Counterfactual set (new outcome: 1.0)


Unnamed: 0,age,workclass,education,marital_status,occupation,race,gender,hours_per_week,income
0,-,Government,Assoc,-,-,-,-,-,1
1,-,Government,Doctorate,-,-,-,-,-,1


In [22]:
e1.visualize_as_dataframe(show_only_changes=False)

Query instance (original outcome : 0)


Unnamed: 0,age,workclass,education,marital_status,occupation,race,gender,hours_per_week,income
0,29,Private,HS-grad,Married,Blue-Collar,White,Female,38,0



Diverse Counterfactual set (new outcome: 1.0)


Unnamed: 0,age,workclass,education,marital_status,occupation,race,gender,hours_per_week,income
0,29,Government,Assoc,Married,Blue-Collar,White,Female,38,1
1,29,Government,Doctorate,Married,Blue-Collar,White,Female,38,1


In [23]:
# Changing only age and education
e2 = exp.generate_counterfactuals(x_test[0:1],
                                  total_CFs=2,
                                  desired_class="opposite",
                                  features_to_vary=["education", "occupation"]
                                  )
e2.visualize_as_dataframe(show_only_changes=True)

100%|██████████| 1/1 [00:00<00:00,  3.40it/s]

Query instance (original outcome : 0)





Unnamed: 0,age,workclass,education,marital_status,occupation,race,gender,hours_per_week,income
0,29,Private,HS-grad,Married,Blue-Collar,White,Female,38,0



Diverse Counterfactual set (new outcome: 1.0)


Unnamed: 0,age,workclass,education,marital_status,occupation,race,gender,hours_per_week,income
0,-,-,Bachelors,-,White-Collar,-,-,-,1
1,-,-,Masters,-,Sales,-,-,-,1


In [24]:
# Restricting age to be between [20,30] and Education to be either {'Doctorate', 'Prof-school'}.
e3 = exp.generate_counterfactuals(x_test[0:1],
                                  total_CFs=2,
                                  desired_class="opposite",
                                  permitted_range={'age': [20, 30], 'education': ['Doctorate', 'Prof-school']})
e3.visualize_as_dataframe(show_only_changes=True)

100%|██████████| 1/1 [00:00<00:00,  4.06it/s]

Query instance (original outcome : 0)





Unnamed: 0,age,workclass,education,marital_status,occupation,race,gender,hours_per_week,income
0,29,Private,HS-grad,Married,Blue-Collar,White,Female,38,0



Diverse Counterfactual set (new outcome: 1.0)


Unnamed: 0,age,workclass,education,marital_status,occupation,race,gender,hours_per_week,income
0,-,Government,-,Single,-,-,-,-,1
1,-,-,Prof-school,-,White-Collar,-,-,-,1


In [47]:
e4 = exp.generate_counterfactuals(x_test[0:1],
                                  total_CFs=5,
                                  desired_class="opposite",
                                  permitted_range={'age': [28, 30],  'education': ["Some-college",'Doctorate', 'Prof-school']})
e4.visualize_as_dataframe(show_only_changes=True)

100%|██████████| 1/1 [00:00<00:00,  2.21it/s]

Query instance (original outcome : 0)





Unnamed: 0,age,workclass,education,marital_status,occupation,race,gender,hours_per_week,income
0,29,Private,HS-grad,Married,Blue-Collar,White,Female,38,0



Diverse Counterfactual set (new outcome: 1.0)


Unnamed: 0,age,workclass,education,marital_status,occupation,race,gender,hours_per_week,income
0,-,-,Doctorate,-,White-Collar,-,-,-,1
1,-,Government,Doctorate,-,-,-,-,-,1
2,-,-,Doctorate,-,-,-,Male,-,1
3,-,Self-Employed,-,-,White-Collar,-,-,-,1
4,-,Government,-,-,Professional,-,-,-,1
