In [1]:
# Import Libraries
from ucimlrepo import fetch_ucirepo 
import pandas as pd

# Sklearn imports
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LogisticRegression

# import dice_ml

In [2]:
# fetch dataset 
adult = fetch_ucirepo(id=2) 

# data (as pandas dataframes) 
X = adult.data.features 
y = adult.data.targets 

# metadata 
print(adult.metadata) 

# variable information 
print(adult.variables) 

{'uci_id': 2, 'name': 'Adult', 'repository_url': 'https://archive.ics.uci.edu/dataset/2/adult', 'data_url': 'https://archive.ics.uci.edu/static/public/2/data.csv', 'abstract': 'Predict whether annual income of an individual exceeds $50K/yr based on census data. Also known as "Census Income" dataset. ', 'area': 'Social Science', 'tasks': ['Classification'], 'characteristics': ['Multivariate'], 'num_instances': 48842, 'num_features': 14, 'feature_types': ['Categorical', 'Integer'], 'demographics': ['Age', 'Income', 'Education Level', 'Other', 'Race', 'Sex'], 'target_col': ['income'], 'index_col': None, 'has_missing_values': 'yes', 'missing_values_symbol': 'NaN', 'year_of_dataset_creation': 1996, 'last_updated': 'Tue Sep 24 2024', 'dataset_doi': '10.24432/C5XW20', 'creators': ['Barry Becker', 'Ronny Kohavi'], 'intro_paper': None, 'additional_info': {'summary': "Extraction was done by Barry Becker from the 1994 Census database.  A set of reasonably clean records was extracted using the fol

In [3]:
# Unite X and y to do some operations before splitting the data
df = pd.concat([X, y], axis=1)

In [4]:
# Dataset Description
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48842 entries, 0 to 48841
Data columns (total 15 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   age             48842 non-null  int64 
 1   workclass       47879 non-null  object
 2   fnlwgt          48842 non-null  int64 
 3   education       48842 non-null  object
 4   education-num   48842 non-null  int64 
 5   marital-status  48842 non-null  object
 6   occupation      47876 non-null  object
 7   relationship    48842 non-null  object
 8   race            48842 non-null  object
 9   sex             48842 non-null  object
 10  capital-gain    48842 non-null  int64 
 11  capital-loss    48842 non-null  int64 
 12  hours-per-week  48842 non-null  int64 
 13  native-country  48568 non-null  object
 14  income          48842 non-null  object
dtypes: int64(6), object(9)
memory usage: 5.6+ MB


In [5]:
# Count how many missing values in each column
print(df.isnull().sum())

age                 0
workclass         963
fnlwgt              0
education           0
education-num       0
marital-status      0
occupation        966
relationship        0
race                0
sex                 0
capital-gain        0
capital-loss        0
hours-per-week      0
native-country    274
income              0
dtype: int64


In [6]:
# Remove nan values, as there are few nan values
df = df.dropna()

In [7]:
df.head()

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,income
0,39,State-gov,77516,Bachelors,13,Never-married,Adm-clerical,Not-in-family,White,Male,2174,0,40,United-States,<=50K
1,50,Self-emp-not-inc,83311,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,13,United-States,<=50K
2,38,Private,215646,HS-grad,9,Divorced,Handlers-cleaners,Not-in-family,White,Male,0,0,40,United-States,<=50K
3,53,Private,234721,11th,7,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,0,0,40,United-States,<=50K
4,28,Private,338409,Bachelors,13,Married-civ-spouse,Prof-specialty,Wife,Black,Female,0,0,40,Cuba,<=50K


In [9]:
# Remove some columns
cols_to_remove = [
    "fnlwgt",  # show how many people in the general population have the same characteristics as the sampled person
    "education-num",  # Already have education column
    "relationship",  # Not relevant to skills
    "marital-status",  # Not relevant to skills
    "race",  # Not relevant to skills
    "sex",  # Not relevant to skills
    "native-country",  # Not relevant
    "capital-gain",  # 91% are zeros
    "capital-loss",  # 95% are zeros
]

df.drop(columns=cols_to_remove, inplace=True)

# Reduce size of dataset
df = df.astype({
    'age': 'int8',
    'hours-per-week': 'int8'
})

# Turn target column into binary
df['income'] = df['income'].apply(lambda x: 1 if x == '>50K' else 0)


In [10]:
df.head()

Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,39,State-gov,Bachelors,Adm-clerical,40,0
1,50,Self-emp-not-inc,Bachelors,Exec-managerial,13,0
2,38,Private,HS-grad,Handlers-cleaners,40,0
3,53,Private,11th,Handlers-cleaners,40,0
4,28,Private,Bachelors,Prof-specialty,40,0


In [11]:
# Split the data
target = df["income"]
train_dataset, test_dataset, y_train, y_test = train_test_split(
    df, target, test_size=0.2, random_state=0, stratify=target
)

x_train = train_dataset.drop('income', axis=1)
x_test = test_dataset.drop('income', axis=1)

In [12]:
# Save split data
train_dataset.to_csv("split_dataset/train_no_capital.csv", index=False)
test_dataset.to_csv("split_dataset/test_no_capital.csv", index=False)
y_train.to_csv("split_dataset/y_train_no_capital.csv", index=False)
y_test.to_csv("split_dataset/y_test_no_capital.csv", index=False)

In [13]:
numerical = ["age", "hours_per_week"]
categorical = x_train.columns.difference(numerical)

categorical_transformer = Pipeline(
    steps=[("onehot", OneHotEncoder(handle_unknown="ignore"))]
)

transformations = ColumnTransformer(
    transformers=[("cat", categorical_transformer, categorical)]
)

# Append classifier to preprocessing pipeline.
# Now we have a full prediction pipeline.
clf = Pipeline(
    steps=[("preprocessor", transformations), ("classifier", LogisticRegression(max_iter=1000))]
)
model = clf.fit(x_train, y_train)

In [14]:
# Evaluate the model
score = model.score(x_test, y_test)
print(f"Model Score: {score}")

Model Score: 0.8395800524934384


In [2]:
import joblib
import pandas as pd

In [16]:
# save the model
joblib.dump(model, 'models/counterfactual_logistic_regression_no_capital.pkl')

['models/counterfactual_logistic_regression_no_capital.pkl']

In [3]:
# Load the model
model = joblib.load('models/counterfactual_logistic_regression_no_capital.pkl')

# Load the data
train_dataset = pd.read_csv("split_dataset/train_no_capital.csv")
test_dataset = pd.read_csv("split_dataset/test_no_capital.csv")
y_train = pd.read_csv("split_dataset/y_train_no_capital.csv")
y_test = pd.read_csv("split_dataset/y_test_no_capital.csv")

x_train = train_dataset.drop('income', axis=1)
x_test = test_dataset.drop('income', axis=1)

In [18]:
train_dataset.head()

Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,37,?,HS-grad,?,55,0
1,32,Private,Some-college,Other-service,40,0
2,46,Private,Masters,Exec-managerial,45,1
3,27,Private,Some-college,Tech-support,40,0
4,33,Private,1st-4th,Priv-house-serv,40,0


# Dice

## Random method

In [4]:
import dice_ml
import pprint

d = dice_ml.Data(dataframe=train_dataset, continuous_features=["age", "hours-per-week"], outcome_name='income')
m = dice_ml.Model(model=model, backend="sklearn")
# Using method=random for generating CFs
# random sampling method produces less sparse CFs in contrast to current DiCE's implementation. The sparsity issue with random sampling worsens with increasing total_CFs
exp_random = dice_ml.Dice(d, m, method="random")

In [23]:
query_instance = x_test[0:5]
imp = exp_random.local_feature_importance(query_instance, total_CFs=10)
pprint.pprint(imp.local_importance)

100%|██████████| 5/5 [00:01<00:00,  4.76it/s]

[{'age': 0.2,
  'education': 1.0,
  'hours-per-week': 0.8,
  'occupation': 0.9,
  'workclass': 0.6},
 {'age': 0.5,
  'education': 1.0,
  'hours-per-week': 1.0,
  'occupation': 0.9,
  'workclass': 0.7},
 {'age': 0.4,
  'education': 1.0,
  'hours-per-week': 1.0,
  'occupation': 1.0,
  'workclass': 0.6},
 {'age': 0.4,
  'education': 1.0,
  'hours-per-week': 0.6,
  'occupation': 1.0,
  'workclass': 0.5},
 {'age': 0.5,
  'education': 1.0,
  'hours-per-week': 0.0,
  'occupation': 0.7,
  'workclass': 0.5}]





In [25]:
query_instances = x_test[0:]
imp = exp_random.global_feature_importance(query_instances)
pprint.pprint(imp.summary_importance)

 73%|███████▎  | 6969/9525 [17:24<05:34,  7.64it/s]

No Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 9525/9525 [23:33<00:00,  6.74it/s]


{'age': 0.3027562492589279,
 'education': 0.9022970540362829,
 'hours-per-week': 0.4450744305871447,
 'occupation': 0.4480818359185521,
 'workclass': 0.43462935615655757}


In [20]:
# Predict from x_test 
#model.predict(x_test.iloc[0:1])
x_test.iloc[0:4]

Unnamed: 0,age,workclass,education,occupation,hours-per-week
0,55,Local-gov,Some-college,Adm-clerical,35
1,26,State-gov,Some-college,Adm-clerical,25
2,18,Private,11th,Other-service,16
3,25,Private,HS-grad,Handlers-cleaners,40


In [5]:
e1 = exp_random.generate_counterfactuals(x_test[0:4], total_CFs=2, desired_class="opposite")
e1.visualize_as_dataframe(show_only_changes=True)

  0%|          | 0/4 [00:00<?, ?it/s]

100%|██████████| 4/4 [00:00<00:00,  6.91it/s]

Query instance (original outcome : 0)





Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,55,Local-gov,Some-college,Adm-clerical,35,0



Diverse Counterfactual set (new outcome: 1)


Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,-,-,Prof-school,Protective-serv,95,1
1,-,Federal-gov,Doctorate,Prof-specialty,-,1


Query instance (original outcome : 0)


Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,26,State-gov,Some-college,Adm-clerical,25,0



Diverse Counterfactual set (new outcome: 1)


Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,76,-,Prof-school,Tech-support,55,1
1,37,-,Doctorate,Transport-moving,42,1


Query instance (original outcome : 0)


Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,18,Private,11th,Other-service,16,0



Diverse Counterfactual set (new outcome: 1)


Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,-,Federal-gov,Bachelors,Transport-moving,84,1
1,23,-,Prof-school,Exec-managerial,59,1


Query instance (original outcome : 0)


Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,25,Private,HS-grad,Handlers-cleaners,40,0



Diverse Counterfactual set (new outcome: 1)


Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,-,-,Prof-school,Protective-serv,-,1
1,-,-,Prof-school,Exec-managerial,-,1


In [27]:
# Changing only some aspects
e2 = exp.generate_counterfactuals(
    x_test[0:5],
    total_CFs=1,
    desired_class="opposite",
    features_to_vary=["education", "workclass", "occupation", "hours-per-week"],
    permitted_range={"hours-per-week": [1, 44]},
)
e2.visualize_as_dataframe(show_only_changes=True)

100%|██████████| 5/5 [00:00<00:00,  5.32it/s]

Query instance (original outcome : 0)





Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,55,Local-gov,Some-college,Adm-clerical,35,0



Diverse Counterfactual set (new outcome: 1)


Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,-,Self-emp-inc,Doctorate,Tech-support,-,1


Query instance (original outcome : 0)


Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,26,State-gov,Some-college,Adm-clerical,25,0



Diverse Counterfactual set (new outcome: 1)


Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,-,-,Prof-school,Protective-serv,26,1


Query instance (original outcome : 0)


Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,18,Private,11th,Other-service,16,0



Diverse Counterfactual set (new outcome: 1)


Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,-,-,Doctorate,Prof-specialty,60,1
1,49,-,Prof-school,Adm-clerical,84,1


Query instance (original outcome : 0)


Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,25,Private,HS-grad,Handlers-cleaners,40,0



Diverse Counterfactual set (new outcome: 1)


Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,-,-,Doctorate,Protective-serv,-,1


Query instance (original outcome : 0)


Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,35,Private,10th,Machine-op-inspct,50,0



Diverse Counterfactual set (new outcome: 1)


Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,-,-,Doctorate,Craft-repair,-,1


## Genetic Algorithm

Use the genetic algorithm to find the best counterfactuals close to the query point.

In [28]:
exp_genetic = dice_ml.Dice(d, m, method="genetic")
e3 = exp_genetic.generate_counterfactuals(x_test[0:5], total_CFs=2, desired_class="opposite")
e3.visualize_as_dataframe(show_only_changes=True)

100%|██████████| 5/5 [00:05<00:00,  1.05s/it]

Query instance (original outcome : 0)





Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,55,Local-gov,Some-college,Adm-clerical,35,0



Diverse Counterfactual set (new outcome: 1)


Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,58,Self-emp-not-inc,Doctorate,Exec-managerial,-,1
0,-,Federal-gov,Masters,Exec-managerial,40,1


Query instance (original outcome : 0)


Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,26,State-gov,Some-college,Adm-clerical,25,0



Diverse Counterfactual set (new outcome: 1)


Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,29,Self-emp-inc,Doctorate,Prof-specialty,35,1
0,25,Federal-gov,Masters,Exec-managerial,40,1


Query instance (original outcome : 0)


Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,18,Private,11th,Other-service,16,0



Diverse Counterfactual set (new outcome: 1)


Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,38,-,Doctorate,Exec-managerial,36,1
0,27,-,Prof-school,Prof-specialty,42,1


Query instance (original outcome : 0)


Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,25,Private,HS-grad,Handlers-cleaners,40,0



Diverse Counterfactual set (new outcome: 1)


Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,-,Federal-gov,Masters,Exec-managerial,-,1
0,17,-,Prof-school,Exec-managerial,-,1


Query instance (original outcome : 0)


Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,35,Private,10th,Machine-op-inspct,50,0



Diverse Counterfactual set (new outcome: 1)


Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,-,-,Prof-school,Prof-specialty,-,1
0,-,-,Masters,Exec-managerial,-,1


In [29]:
# Changing only some aspects
e4 = exp_genetic.generate_counterfactuals(
    x_test[0:4],
    total_CFs=2,
    desired_class="opposite",
    features_to_vary=["education", "workclass", "occupation", "hours-per-week"],
    permitted_range={"hours-per-week": [1, 44]},
)
e4.visualize_as_dataframe(show_only_changes=True)

100%|██████████| 4/4 [00:20<00:00,  5.07s/it]

Query instance (original outcome : 0)





Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,55,Local-gov,Some-college,Adm-clerical,35,0



Diverse Counterfactual set (new outcome: 1)


Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,-,Self-emp-not-inc,Doctorate,Exec-managerial,-,1
0,-,-,Prof-school,Transport-moving,44,1


Query instance (original outcome : 0)


Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,26,State-gov,Some-college,Adm-clerical,25,0



Diverse Counterfactual set (new outcome: 1)


Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,29,Self-emp-not-inc,Doctorate,Exec-managerial,35,1
0,29,Self-emp-inc,Doctorate,Prof-specialty,35,1


Query instance (original outcome : 0)


Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,18,Private,11th,Other-service,16,0



Diverse Counterfactual set (new outcome: 1)


Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,38,-,Doctorate,Exec-managerial,36,1
0,27,-,Prof-school,Prof-specialty,42,1


Query instance (original outcome : 0)


Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,25,Private,HS-grad,Handlers-cleaners,40,0



Diverse Counterfactual set (new outcome: 1)


Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,-,Federal-gov,Masters,Exec-managerial,-,1
0,17,-,Prof-school,Exec-managerial,-,1


Query instance (original outcome : 0)


Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,35,Private,10th,Machine-op-inspct,50,0



Diverse Counterfactual set (new outcome: 1)


Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,-,-,Doctorate,Craft-repair,-,1


## Querying a KD Tree   

Find the closest points in the dataset that give the output as the desired class. Does it by building KD trees for each class, and querying the KD tree of the desired class to find the k closest counterfactuals from the dataset. The idea behind finding the closest points from the training data itself is to ensure that the counterfactuals displayed are feasible.

In [30]:
exp_KD = dice_ml.Dice(d, m, method='kdtree')

e5 = exp_genetic.generate_counterfactuals(x_test[0:4], total_CFs=2, desired_class="opposite")
e5.visualize_as_dataframe(show_only_changes=True)

100%|██████████| 4/4 [00:02<00:00,  1.46it/s]

Query instance (original outcome : 0)





Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,55,Local-gov,Some-college,Adm-clerical,35,0



Diverse Counterfactual set (new outcome: 1)


Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,58,Self-emp-not-inc,Doctorate,Exec-managerial,-,1
0,-,Federal-gov,Masters,Exec-managerial,40,1


Query instance (original outcome : 0)


Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,26,State-gov,Some-college,Adm-clerical,25,0



Diverse Counterfactual set (new outcome: 1)


Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,29,Self-emp-not-inc,Doctorate,Exec-managerial,35,1
0,29,Self-emp-inc,Doctorate,Prof-specialty,35,1


Query instance (original outcome : 0)


Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,18,Private,11th,Other-service,16,0



Diverse Counterfactual set (new outcome: 1)


Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,38,-,Doctorate,Exec-managerial,36,1
0,27,-,Prof-school,Prof-specialty,42,1


Query instance (original outcome : 0)


Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,25,Private,HS-grad,Handlers-cleaners,40,0



Diverse Counterfactual set (new outcome: 1)


Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,-,Federal-gov,Masters,Exec-managerial,-,1
0,33,-,Prof-school,Exec-managerial,-,1


In [31]:
# Changing only some aspects
e6 = exp_genetic.generate_counterfactuals(
    x_test[0:4],
    total_CFs=2,
    desired_class="opposite",
    features_to_vary=["education", "workclass", "occupation", "hours-per-week"],
    permitted_range={"hours-per-week": [1, 44]},
)
e6.visualize_as_dataframe(show_only_changes=True)

100%|██████████| 4/4 [00:24<00:00,  6.14s/it]

Query instance (original outcome : 0)





Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,55,Local-gov,Some-college,Adm-clerical,35,0



Diverse Counterfactual set (new outcome: 1)


Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,-,Self-emp-not-inc,Doctorate,Exec-managerial,-,1
0,-,-,Masters,Exec-managerial,44,1


Query instance (original outcome : 0)


Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,26,State-gov,Some-college,Adm-clerical,25,0



Diverse Counterfactual set (new outcome: 1)


Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,-,Local-gov,Doctorate,-,44,1
0,-,Federal-gov,Doctorate,Tech-support,29,1


Query instance (original outcome : 0)


Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,18,Private,11th,Other-service,16,0



Diverse Counterfactual set (new outcome: 1)


Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,-,-,Doctorate,Transport-moving,26,1
0,-,-,Doctorate,Exec-managerial,36,1


Query instance (original outcome : 0)


Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,25,Private,HS-grad,Handlers-cleaners,40,0



Diverse Counterfactual set (new outcome: 1)


Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,-,-,Prof-school,Exec-managerial,-,1


In [31]:
# Avaliação da Factibilidade das Explicações
def check_feasibility(counterfactuals, dataset):
    feasible = []
    for cf in counterfactuals:
        if cf in dataset.values:
            feasible.append(True)
        else:
            feasible.append(False)
    return feasible

# Exemplo de uso
counterfactuals = e1.cf_examples_list[0].final_cfs_df
display(counterfactuals)
feasibility = check_feasibility(counterfactuals.values, train_dataset)
print('Feasibility of Counterfactuals:', feasibility)

Unnamed: 0,age,workclass,education,occupation,hours-per-week,income
0,55,Local-gov,Prof-school,Protective-serv,95,1
1,55,Federal-gov,Doctorate,Prof-specialty,35,1


Feasibility of Counterfactuals: [True, True]


In [67]:
# Análise de Sensibilidade
def format_and_print_results(results):
    formatted_results = [(value, int(pred[0])) for value, pred in results]
    print('Sensitivity Analysis Results:')
    for value, pred in formatted_results:
        print(f'Value: {value}, Prediction: {pred}')


def sensitivity_analysis(instance, model, feature, values):
    results = []
    for value in values:
        instance[feature] = value
        prediction = model.predict(instance.to_frame().T)
        results.append((value, prediction))
    return results

# Exemplo de usos

# Sensitivity Analysis for Age
original_instance = x_test.iloc[2]
display(pd.DataFrame(original_instance).T)
sensitivity_results = sensitivity_analysis(original_instance.copy(), model, 'age', range(20, 60, 5))
format_and_print_results(sensitivity_results)

# Sensitivity Analysis for Hours per Week
original_instance = x_test.iloc[13]
display(pd.DataFrame(original_instance).T)
sensitivity_results = sensitivity_analysis(original_instance.copy(), model, 'hours_per_week', range(5, 45, 5))
format_and_print_results(sensitivity_results)

# Sensitivity Analysis for Education
original_instance = x_test.iloc[15]
display(pd.DataFrame(original_instance).T)
sensitivity_results = sensitivity_analysis(original_instance.copy(), model, 'education', x_test['education'].unique())
format_and_print_results(sensitivity_results)

Unnamed: 0,age,workclass,education,occupation,hours-per-week
2,18,Private,11th,Other-service,16


Sensitivity Analysis Results:
Value: 20, Prediction: 0
Value: 25, Prediction: 0
Value: 30, Prediction: 0
Value: 35, Prediction: 0
Value: 40, Prediction: 0
Value: 45, Prediction: 0
Value: 50, Prediction: 0
Value: 55, Prediction: 0


Unnamed: 0,age,workclass,education,occupation,hours-per-week
13,19,Private,Some-college,Other-service,20


Sensitivity Analysis Results:
Value: 5, Prediction: 0
Value: 10, Prediction: 0
Value: 15, Prediction: 0
Value: 20, Prediction: 0
Value: 25, Prediction: 0
Value: 30, Prediction: 0
Value: 35, Prediction: 0
Value: 40, Prediction: 0


Unnamed: 0,age,workclass,education,occupation,hours-per-week
15,27,Private,Some-college,Craft-repair,45


Sensitivity Analysis Results:
Value: Some-college, Prediction: 0
Value: 11th, Prediction: 0
Value: HS-grad, Prediction: 0
Value: 10th, Prediction: 0
Value: Doctorate, Prediction: 1
Value: Bachelors, Prediction: 0
Value: Masters, Prediction: 0
Value: 7th-8th, Prediction: 0
Value: 1st-4th, Prediction: 0
Value: Assoc-voc, Prediction: 0
Value: Assoc-acdm, Prediction: 0
Value: 9th, Prediction: 0
Value: 5th-6th, Prediction: 0
Value: 12th, Prediction: 0
Value: Prof-school, Prediction: 1
Value: Preschool, Prediction: 0
