In [1]:
from counterfactuals.explainers import Fimap
from counterfactuals.constraints import ValueMonotonicity, ValueNominal, Freeze
from data import AdultData
from tensorflow import keras
import random
import numpy as np
import tensorflow as tf
import os
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

def reset_random_seeds(seed=42):
   os.environ['PYTHONHASHSEED']=str(seed)
   tf.random.set_seed(seed)
   np.random.seed(seed)
   random.seed(seed)

reset_random_seeds()

In [None]:
from sklearn.ensemble import RandomForestClassifier
from counterfactuals.explainers import Fimap
from data import AdultData

adult_data = AdultData('data/datasets/adult.csv')

rf = RandomForestClassifier(n_estimators=10)
rf.fit(adult_data.X_train, adult_data.y_train)
predictions = rf.predict(adult_data.X_train)

categorical_columns = ['workclass', 'education', 'marital.status', 'occupation',
                               'relationship', 'race', 'sex', 'native.country']
freeze_columns = ['race', 'sex', 'native.country']

constraints = [
            ValueNominal(columns=categorical_columns), 
            Freeze(columns=freeze_columns),
            ValueMonotonicity(['age'], 'increasing')
]

fimap = Fimap(constraints=constraints)
fimap.fit(adult_data.raw_X_train, predictions, epochs=400)

In [2]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

adult_data = AdultData('data/datasets/adult.csv')


rf=RandomForestClassifier(n_estimators=10)
rf.fit(adult_data.X_train, adult_data.y_train)
print(classification_report(adult_data.y_test, rf.predict(adult_data.X_test)))
model = rf

              precision    recall  f1-score   support

           0       0.90      0.75      0.82      4533
           1       0.50      0.76      0.60      1500

    accuracy                           0.75      6033
   macro avg       0.70      0.75      0.71      6033
weighted avg       0.80      0.75      0.77      6033



In [3]:
"""
adult_data = AdultData('data/datasets/adult.csv')
model = keras.models.load_model('models/model_adult')
model_predictions = model.predict(adult_data.X_train)
model_predictions[model_predictions > 0.5] = 1
model_predictions[model_predictions <= 0.5] = 0
"""

"\nadult_data = AdultData('data/datasets/adult.csv')\nmodel = keras.models.load_model('models/model_adult')\nmodel_predictions = model.predict(adult_data.X_train)\nmodel_predictions[model_predictions > 0.5] = 1\nmodel_predictions[model_predictions <= 0.5] = 0\n"

In [4]:
train_X, test_X, train_y, test_y = adult_data.raw_X_train, adult_data.raw_X_test, adult_data.raw_y_train, adult_data.raw_y_test
model_predictions = rf.predict(adult_data.X_train)

In [5]:
def compare(i, fimap):
    original_class = model.predict(adult_data.X_train.iloc[i].to_frame().T)[0]
    x = test_X.iloc[i]
    cf = fimap.generate(x)
    x["income"] = original_class
    surrogate_class = fimap._s_prediction
    cf["income"] = surrogate_class
    return pd.concat([x.to_frame().T, cf])

In [6]:
    
fimap = Fimap(constraints=[ValueNominal(adult_data.categorical_columns)])
fimap.fit(adult_data.raw_X_train, model_predictions, epochs=400)

dfs = [compare(i, fimap) for i in range(200)]
concat_df = pd.concat(dfs)
concat_df.to_csv("results_nominal_constraints.csv")


Training s
Training loss (for one batch): 0.6841 
Training accuracy 0.6051045
Training loss (for one batch): 0.6702 
Training accuracy 0.64175606
Training loss (for one batch): 0.6458 
Training accuracy 0.6636274
Training loss (for one batch): 0.6093 
Training accuracy 0.6792607
Training loss (for one batch): 0.5655 
Training accuracy 0.69105417
Training loss (for one batch): 0.5295 
Training accuracy 0.7002281
Training loss (for one batch): 0.5109 
Training accuracy 0.7076197
Training loss (for one batch): 0.5029 
Training accuracy 0.7138674
Training loss (for one batch): 0.4954 
Training accuracy 0.71941
Training loss (for one batch): 0.4931 
Training accuracy 0.72418344
Training loss (for one batch): 0.4885 
Training accuracy 0.7282899
Training loss (for one batch): 0.4875 
Training accuracy 0.7319031
Training loss (for one batch): 0.4871 
Training accuracy 0.7350088
Training loss (for one batch): 0.4843 
Training accuracy 0.73783785
Training loss (for one batch): 0.4837 
Training 

In [None]:
for constraint in adult_data.constraints:
    print(constraint)
    
fimap = Fimap(constraints=adult_data.constraints)
fimap.fit(adult_data.raw_X_train, model_predictions, epochs=400)


dfs = [compare(i, fimap) for i in range(200)]
concat_df = pd.concat(dfs)
concat_df.to_csv("results_normal_constraints.csv")

ValueNominal(columns=['workclass', 'marital.status', 'occupation', 'race', 'sex'], values=[])
Freeze(columns=['race', 'sex'])

Training s
Training loss (for one batch): 0.6849 
Training accuracy 0.600897
Training loss (for one batch): 0.6697 
Training accuracy 0.65645
Training loss (for one batch): 0.6434 
Training accuracy 0.68066704
Training loss (for one batch): 0.6026 
Training accuracy 0.69414186
Training loss (for one batch): 0.5555 
Training accuracy 0.70330685
Training loss (for one batch): 0.5231 
Training accuracy 0.7106097
Training loss (for one batch): 0.5099 
Training accuracy 0.7168974
Training loss (for one batch): 0.4979 
Training accuracy 0.72246045
Training loss (for one batch): 0.4905 
Training accuracy 0.72732365
Training loss (for one batch): 0.4888 
Training accuracy 0.73159516
Training loss (for one batch): 0.4864 
Training accuracy 0.73532796
Training loss (for one batch): 0.4857 
Training accuracy 0.7386274
Training loss (for one batch): 0.4820 
Training accura

In [None]:
for constraint in adult_data.constraints:
    print(constraint)
    
fimap = Fimap(constraints=adult_data.constraints + [ValueMonotonicity(['age'], 'increasing')])
fimap.fit(adult_data.raw_X_train, model_predictions, epochs=400)


dfs = [compare(i, fimap) for i in range(200)]
concat_df = pd.concat(dfs)
concat_df.to_csv("results_normal_constraints_age_increasing.csv")

ValueNominal(columns=['workclass', 'marital.status', 'occupation', 'race', 'sex'], values=[])
Freeze(columns=['race', 'sex'])

Training s
Training loss (for one batch): 0.6842 
Training accuracy 0.5449325
Training loss (for one batch): 0.6692 
Training accuracy 0.59204733
Training loss (for one batch): 0.6429 
Training accuracy 0.62741506
Training loss (for one batch): 0.6012 
Training accuracy 0.6526979
Training loss (for one batch): 0.5535 
Training accuracy 0.67086923
Training loss (for one batch): 0.5185 
Training accuracy 0.68438596
Training loss (for one batch): 0.5031 
Training accuracy 0.69471055
Training loss (for one batch): 0.4966 
Training accuracy 0.7033498
Training loss (for one batch): 0.4911 
Training accuracy 0.71066624
Training loss (for one batch): 0.4881 
Training accuracy 0.7168504
Training loss (for one batch): 0.4846 
Training accuracy 0.72223467
Training loss (for one batch): 0.4837 
Training accuracy 0.7268163
Training loss (for one batch): 0.4818 
Training acc