# Importations

In [35]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

from datetime import datetime, timedelta

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split # type: ignore
from sklearn.preprocessing import Binarizer, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, ConfusionMatrixDisplay
from aif360.datasets import BinaryLabelDataset
from aif360.metrics import BinaryLabelDatasetMetric, ClassificationMetric
from aif360.algorithms.preprocessing import Reweighing
#from aif360.algorithms.inprocessing import AdversarialDebiasing
import tensorflow as tf

# 1 - Classification

### 1.1 Load and Preprocess Data

In [36]:
# Load the dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data"
columns = ["age", "workclass", "fnlwgt", "education", "education-num", "marital-status",
           "occupation", "relationship", "race", "sex", "capital-gain", "capital-loss",
           "hours-per-week", "native-country", "income"]
data = pd.read_csv(url, header=None, names=columns, na_values=" ?", skipinitialspace=True)

# Drop rows with missing values
data.dropna(inplace=True)

# Need to copy the data as we don't want a binarized part for testing the privacy
data_copy = data.copy()

# Binarize the 'age' attribute
binarizer = Binarizer(threshold=30)
data['age'] = binarizer.fit_transform(data[['age']])

# Convert categorical variables to dummy variables
data = pd.get_dummies(data, drop_first=True)

data

Unnamed: 0,age,fnlwgt,education-num,capital-gain,capital-loss,hours-per-week,workclass_Federal-gov,workclass_Local-gov,workclass_Never-worked,workclass_Private,...,native-country_Puerto-Rico,native-country_Scotland,native-country_South,native-country_Taiwan,native-country_Thailand,native-country_Trinadad&Tobago,native-country_United-States,native-country_Vietnam,native-country_Yugoslavia,income_>50K
0,1,77516,13,2174,0,40,False,False,False,False,...,False,False,False,False,False,False,True,False,False,False
1,1,83311,13,0,0,13,False,False,False,False,...,False,False,False,False,False,False,True,False,False,False
2,1,215646,9,0,0,40,False,False,False,True,...,False,False,False,False,False,False,True,False,False,False
3,1,234721,7,0,0,40,False,False,False,True,...,False,False,False,False,False,False,True,False,False,False
4,0,338409,13,0,0,40,False,False,False,True,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32556,0,257302,12,0,0,38,False,False,False,True,...,False,False,False,False,False,False,True,False,False,False
32557,1,154374,9,0,0,40,False,False,False,True,...,False,False,False,False,False,False,True,False,False,True
32558,1,151910,9,0,0,40,False,False,False,True,...,False,False,False,False,False,False,True,False,False,False
32559,0,201490,9,0,0,20,False,False,False,True,...,False,False,False,False,False,False,True,False,False,False


### 1.2 Train a logistic regression classifier and measure its performance

In [37]:
# Split the data into features and target variable
X = data.drop('income_>50K', axis=1)
y = data['income_>50K']

# Split the data into train, validation, and test sets (70% train, 15% validation, 15% test)
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

# Train a logistic regression classifier
classifier = LogisticRegression(max_iter=2000)
classifier.fit(X_train_scaled, y_train)

# Predict on the test set
y_pred = classifier.predict(X_test_scaled)

# Measure performance
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

performance_metrics = {
    'Accuracy': accuracy,
    'Precision': precision,
    'Recall': recall,
    'F1 Score': f1
}

print("Performance Metrics:", performance_metrics)


Performance Metrics: {'Accuracy': 0.8552712384851586, 'Precision': 0.7271750805585392, 'Recall': 0.5991150442477876, 'F1 Score': 0.6569626394953906}


# 2 - Fairness


**2.2 Fairness Metrics**

In [38]:
from fairlearn.metrics import demographic_parity_difference, demographic_parity_ratio

# Define sensitive features: 'age' and 'sex_Male'
age_sensitive = X_test['age']
sex_sensitive = X_test['sex_Male']

# Compute fairness metrics for the original classifier
dpd_age = demographic_parity_difference(y_test, y_pred, sensitive_features=age_sensitive)
dpr_age = demographic_parity_ratio(y_test, y_pred, sensitive_features=age_sensitive)

dpd_sex = demographic_parity_difference(y_test, y_pred, sensitive_features=sex_sensitive)
dpr_sex = demographic_parity_ratio(y_test, y_pred, sensitive_features=sex_sensitive)

print("Fairness Metrics (Original Classifier):")
print(f"Age - Demographic Parity Difference: {dpd_age}, Demographic Parity Ratio: {dpr_age}")
print(f"Sex - Demographic Parity Difference: {dpd_sex}, Demographic Parity Ratio: {dpr_sex}")


Fairness Metrics (Original Classifier):
Age - Demographic Parity Difference: 0.2461163789376603, Demographic Parity Ratio: 0.08738725259693071
Sex - Demographic Parity Difference: 0.1725282306338493, Demographic Parity Ratio: 0.3045489515167733


**2.3 Apply a Fairness Mitigation Technique**

In [39]:
from aif360.datasets import BinaryLabelDataset
from aif360.algorithms.preprocessing import Reweighing
from sklearn.linear_model import LogisticRegression

# Convert data into BinaryLabelDataset format
binary_data = BinaryLabelDataset(
    df=pd.concat([X_train, y_train], axis=1),  # Combine features and target
    label_names=['income_>50K'],               # Target column
    protected_attribute_names=['age', 'sex_Male']  # Sensitive attributes
)

# Apply reweighting to the data
rw = Reweighing(unprivileged_groups=[{'age': 0}], privileged_groups=[{'age': 1}])  # Reweigh based on age
rw.fit(binary_data)  # Fit the reweighing model
reweighted_data = rw.transform(binary_data)  # Apply reweighting

# Train the classifier using the reweighted data
classifier_reweighted = LogisticRegression(max_iter=2000)
classifier_reweighted.fit(X_train_scaled, y_train, sample_weight=reweighted_data.instance_weights)

# Predict and measure performance on the reweighted classifier
y_pred_reweighted = classifier_reweighted.predict(X_test_scaled)

# Performance metrics for reweighted classifier
accuracy_reweighted = accuracy_score(y_test, y_pred_reweighted)
precision_reweighted = precision_score(y_test, y_pred_reweighted)
recall_reweighted = recall_score(y_test, y_pred_reweighted)
f1_reweighted = f1_score(y_test, y_pred_reweighted)

print("\nReweighted Classifier Performance Metrics:")
print(f"Accuracy: {accuracy_reweighted}")
print(f"Precision: {precision_reweighted}")
print(f"Recall: {recall_reweighted}")
print(f"F1 Score: {f1_reweighted}")


Reweighted Classifier Performance Metrics:
Accuracy: 0.8380757420675538
Precision: 0.6968641114982579
Recall: 0.5309734513274337
F1 Score: 0.6027122049221497


**2.4 Report Fairness Metrics on the Classifier and the Fair Classifier**

In [40]:
dpd_reweighted_age = demographic_parity_difference(y_test, y_pred_reweighted, sensitive_features=age_sensitive)
dpr_reweighted_age = demographic_parity_ratio(y_test, y_pred_reweighted, sensitive_features=age_sensitive)

dpd_reweighted_sex = demographic_parity_difference(y_test, y_pred_reweighted, sensitive_features=sex_sensitive)
dpr_reweighted_sex = demographic_parity_ratio(y_test, y_pred_reweighted, sensitive_features=sex_sensitive)

In [41]:
import pandas as pd

# Fairness metrics for the original classifier
metrics_original = {
    'Metric': ['Demographic Parity Difference (Age)', 'Demographic Parity Ratio (Age)',
               'Demographic Parity Difference (Sex)', 'Demographic Parity Ratio (Sex)'],
    'Original Classifier': [dpd_age, dpr_age, dpd_sex, dpr_sex]
}

# Performance metrics for the original classifier
metrics_original_performance = {
    'Metric': ['Accuracy', 'Precision', 'Recall', 'F1 Score'],
    'Original Classifier': [accuracy, precision, recall, f1]
}

# Fairness metrics for the reweighted classifier
metrics_reweighted = {
    'Metric': ['Demographic Parity Difference (Age)', 'Demographic Parity Ratio (Age)',
               'Demographic Parity Difference (Sex)', 'Demographic Parity Ratio (Sex)'],
    'Reweighted Classifier': [dpd_reweighted_age, dpr_reweighted_age, dpd_reweighted_sex, dpr_reweighted_sex]
}

# Performance metrics for the reweighted classifier
metrics_reweighted_performance = {
    'Metric': ['Accuracy', 'Precision', 'Recall', 'F1 Score'],
    'Reweighted Classifier': [accuracy_reweighted, precision_reweighted, recall_reweighted, f1_reweighted]
}

# Convert dictionaries to dataframes
df_original = pd.DataFrame(metrics_original)
df_original_performance = pd.DataFrame(metrics_original_performance)
df_reweighted = pd.DataFrame(metrics_reweighted)
df_reweighted_performance = pd.DataFrame(metrics_reweighted_performance)

# Display fairness metrics and performance metrics in tables
print("Fairness Metrics Comparison (Original vs Reweighted Classifier):")
print(pd.concat([df_original.set_index('Metric'), df_reweighted.set_index('Metric')], axis=1))

print("\nPerformance Metrics Comparison (Original vs Reweighted Classifier):")
print(pd.concat([df_original_performance.set_index('Metric'), df_reweighted_performance.set_index('Metric')], axis=1))


Fairness Metrics Comparison (Original vs Reweighted Classifier):
                                     Original Classifier  \
Metric                                                     
Demographic Parity Difference (Age)             0.246116   
Demographic Parity Ratio (Age)                  0.087387   
Demographic Parity Difference (Sex)             0.172528   
Demographic Parity Ratio (Sex)                  0.304549   

                                     Reweighted Classifier  
Metric                                                      
Demographic Parity Difference (Age)               0.100166  
Demographic Parity Ratio (Age)                    0.519463  
Demographic Parity Difference (Sex)               0.140902  
Demographic Parity Ratio (Sex)                    0.368751  

Performance Metrics Comparison (Original vs Reweighted Classifier):
           Original Classifier  Reweighted Classifier
Metric                                               
Accuracy              0.855271 

# 3 - Privacy


### 3.1 First cross tabulation

In [42]:
# Cross-tabulation on age and sex
sensitive_crosstab = pd.crosstab(data_copy['age'], data_copy['sex'])

print("the distribution is the following: \n", sensitive_crosstab)

the distribution is the following: 
 sex  Female  Male
age              
17      186   209
18      268   282
19      356   356
20      363   390
21      329   391
..      ...   ...
85        1     2
86        1     0
87        0     1
88        1     2
90       14    29

[73 rows x 2 columns]


### 3.2 Local differential privacy

In [None]:
# making a private dataset
private_data = data_copy.copy()

#We can change the values of the epsilon to make tests on them here, the current ones are the ones
#that seemed the more coherent after testing multiple ones
epsilon_age = 0.9
epsilon_sex = 0.2 #so 20% of the sex values are changed

#Apply Laplace noise and round it(for age)
def laplace_noise(value, epsilon):
    return round(value + np.random.laplace(0, 1 / epsilon))

#Apply randomized response (for sex)
def randomized_response(value, epsilon):
    if np.random.rand() < epsilon_sex: #this way, the valu of epsilo_sex is the proportion of changed sex
        return 'Male' if (value == 'Female') else 'Female'
    return value

#remark: we can't take a epsilon too small for sex, else we risk blurring information on sex inequalities

#Apply local differential privacy to age and sex on dataPrivate
private_data['age'] = data_copy['age'].apply(lambda x: laplace_noise(x, epsilon_age))
private_data['sex'] = data_copy['sex'].apply(lambda x: randomized_response(x, epsilon_sex))

# Concatenate the 'age' and 'sex' columns from both original (dataCopy) and private (dataPrivate) data
comparison_df = pd.concat([data_copy[['age', 'sex']], private_data[['age', 'sex']]], axis=1)
comparison_df.columns = ['Original Age', 'Original Sex', 'Private Age', 'Private Sex']
comparison_df = comparison_df[['Original Age', 'Private Age', 'Original Sex', 'Private Sex']]

# Show the comparison to see how the private dataset differs from the original one
print(comparison_df)

       Original Age  Private Age Original Sex Private Sex
0                39           39         Male        Male
1                50           50         Male        Male
2                38           38         Male        Male
3                53           53         Male        Male
4                28           28       Female      Female
...             ...          ...          ...         ...
32556            27           27       Female        Male
32557            40           40         Male        Male
32558            58           57       Female        Male
32559            22           22         Male        Male
32560            52           51       Female      Female

[32561 rows x 4 columns]


### 3.3 Cross tabulation on the private dataset

In [44]:
#Cross-tabulation for the private data
private_crosstab = pd.crosstab(private_data['age'], private_data['sex'])

print("the distribution on the private data is the following:\n", private_crosstab)

# Align both crosstabs, necessary, else it returns an empty dataset
sensitive_crosstab, private_crosstab = sensitive_crosstab.align(private_crosstab, join='outer', axis=0, fill_value=0)

#Calculate the estimation errors
#this shows the differences in distribution between the private and original dataset
comparison_crosstab = sensitive_crosstab.subtract(private_crosstab, fill_value=0)
print("Estimation error:\n", comparison_crosstab)

print("note: negative values mean there are more in the private dataset than in the original one")

the distribution on the private data is the following:
 sex  Female  Male
age              
16        5    11
17      183   204
18      282   253
19      341   370
20      368   382
..      ...   ...
86        1     0
88        2     2
89        1     0
90       19    22
91        0     1

[75 rows x 2 columns]
Estimation error:
 sex  Female  Male
age              
16       -5   -11
17        3     5
18      -14    29
19       15   -14
20       -5     8
..      ...   ...
87        0     1
88       -1     0
89       -1     0
90       -5     7
91        0    -1

[76 rows x 2 columns]
note: negative values mean there are more in the private dataset than in the original one


### 3.4 Data splitting and classification

In [45]:
# we binarize the ages again
private_data['age'] = binarizer.fit_transform(private_data[['age']])

# Convert categorical variables to dummy variables
private_data = pd.get_dummies(private_data, drop_first=True)

#now we make a process similar to the one from (1):

# Split the data into features and target variable
X_p = private_data.drop('income_>50K', axis=1)
Y_p = private_data['income_>50K']

# Split the data into train, validation, and test sets (70% train, 15% validation, 15% test)
X_train_p, X_temp_p, Y_train_p, Y_temp_p = train_test_split(X_p, Y_p, test_size=0.3, random_state=42)
X_val_p, X_test_p, Y_val_p, Y_test_p = train_test_split(X_temp_p, Y_temp_p, test_size=0.5, random_state=42)

# Scale the data
#scaler was already defined in (1)
X_train_scaled_p = scaler.fit_transform(X_train_p)
X_val_scaled_p = scaler.transform(X_val_p)
X_test_scaled_p = scaler.transform(X_test_p)

# Train a logistic regression classifier
private_classifier = LogisticRegression(max_iter=2000)
private_classifier.fit(X_train_scaled_p, Y_train_p)


# Predict on the test set
Y_pred_p = private_classifier.predict(X_test_scaled_p)

In [46]:
X_train_scaled_p.shape[1]

100

### 3.5 Performances measuring

In [47]:
# Measure performance on the private classifier
private_accuracy = accuracy_score(Y_test_p, Y_pred_p)
private_precision = precision_score(Y_test_p, Y_pred_p)
private_recall = recall_score(Y_test_p, Y_pred_p)
private_f1 = f1_score(Y_test_p, Y_pred_p)

private_performance_metrics = {
    'Accuracy': private_accuracy,
    'Precision': private_precision,
    'Recall': private_recall,
    'F1 Score': private_f1
}

print("Performance Metrics of the original dataset:", performance_metrics)
print("Performance Metrics of the private dataset: ", private_performance_metrics)

Performance Metrics of the original dataset: {'Accuracy': 0.8552712384851586, 'Precision': 0.7271750805585392, 'Recall': 0.5991150442477876, 'F1 Score': 0.6569626394953906}
Performance Metrics of the private dataset:  {'Accuracy': 0.8528147389969294, 'Precision': 0.7236126224156693, 'Recall': 0.588495575221239, 'F1 Score': 0.6490971205466081}


# 4 - Privacy and Fairness


In [48]:
# Convert the private data into BinaryLabelDataset format
binary_private_data = BinaryLabelDataset(
    df=pd.concat([X_train_p, Y_train_p], axis=1),
    label_names=['income_>50K'],
    protected_attribute_names=['age']
)

# Apply Reweighing to the private data
rw_private = Reweighing(unprivileged_groups=[{'age': 0}], privileged_groups=[{'age': 1}])
rw_private.fit(binary_private_data)  # Fit the Reweighing algorithm
reweighted_private_data = rw_private.transform(binary_private_data)  # Apply weights

# Train the logistic regression classifier with reweighted data
private_fair_classifier = LogisticRegression(max_iter=2000)
private_fair_classifier.fit(X_train_scaled_p, Y_train_p, sample_weight=reweighted_private_data.instance_weights)

# Predict on the test set
Y_pred_private_fair = private_fair_classifier.predict(X_test_scaled_p)

# Performance metrics for Private+Fair Classifier
accuracy_private_fair = accuracy_score(Y_test_p, Y_pred_private_fair)
precision_private_fair = precision_score(Y_test_p, Y_pred_private_fair)
recall_private_fair = recall_score(Y_test_p, Y_pred_private_fair)
f1_private_fair = f1_score(Y_test_p, Y_pred_private_fair)

# Display metrics
print("Private+Fair Classifier Performance Metrics:")
print(f"Accuracy: {accuracy_private_fair}")
print(f"Precision: {precision_private_fair}")
print(f"Recall: {recall_private_fair}")
print(f"F1 Score: {f1_private_fair}")

Private+Fair Classifier Performance Metrics:
Accuracy: 0.8397134083930399
Precision: 0.7019790454016298
Recall: 0.5336283185840708
F1 Score: 0.6063348416289592


In [49]:
age_sensitive_private = X_test_p['age']  
sex_sensitive_private = X_test_p['sex_Male'] 

# Fairness metrics for the Private+Fair Classifier
dpd_private_fair_age = demographic_parity_difference(Y_test_p, Y_pred_private_fair, sensitive_features=age_sensitive_private)
dpr_private_fair_age = demographic_parity_ratio(Y_test_p, Y_pred_private_fair, sensitive_features=age_sensitive_private)

dpd_private_fair_sex = demographic_parity_difference(Y_test_p, Y_pred_private_fair, sensitive_features=sex_sensitive_private)
dpr_private_fair_sex = demographic_parity_ratio(Y_test_p, Y_pred_private_fair, sensitive_features=sex_sensitive_private)

print("\nPrivate+Fair Classifier Fairness Metrics:")
print(f"Age - Demographic Parity Difference: {dpd_private_fair_age}, Demographic Parity Ratio: {dpr_private_fair_age}")
print(f"Sex - Demographic Parity Difference: {dpd_private_fair_sex}, Demographic Parity Ratio: {dpr_private_fair_sex}")



Private+Fair Classifier Fairness Metrics:
Age - Demographic Parity Difference: 0.10170206196731359, Demographic Parity Ratio: 0.5123893903072215
Sex - Demographic Parity Difference: 0.08781984139898075, Demographic Parity Ratio: 0.5851516969208279


In [50]:
# Fairness metrics for private+fair classifier
metrics_private_fair = {
    'Metric': ['Demographic Parity Difference (Age)', 'Demographic Parity Ratio (Age)',
               'Demographic Parity Difference (Sex)', 'Demographic Parity Ratio (Sex)'],
    'Private+Fair Classifier': [dpd_private_fair_age, dpr_private_fair_age, dpd_private_fair_sex, dpr_private_fair_sex]
}

# Performance metrics for private+fair classifier
metrics_private_fair_performance = {
    'Metric': ['Accuracy', 'Precision', 'Recall', 'F1 Score'],
    'Private+Fair Classifier': [accuracy_private_fair, precision_private_fair, recall_private_fair, f1_private_fair]
}

# Convert to DataFrames
df_private_fair = pd.DataFrame(metrics_private_fair)
df_private_fair_performance = pd.DataFrame(metrics_private_fair_performance)

# Display comparison tables
print("\nFairness Metrics Comparison (Original vs Reweighted vs Private+Fair Classifier):")
print(pd.concat([df_original.set_index('Metric'), 
                 df_reweighted.set_index('Metric'), 
                 df_private_fair.set_index('Metric')], axis=1))

print("\nPerformance Metrics Comparison (Original vs Reweighted vs Private+Fair Classifier):")
print(pd.concat([df_original_performance.set_index('Metric'), 
                 df_reweighted_performance.set_index('Metric'), 
                 df_private_fair_performance.set_index('Metric')], axis=1))



Fairness Metrics Comparison (Original vs Reweighted vs Private+Fair Classifier):
                                     Original Classifier  \
Metric                                                     
Demographic Parity Difference (Age)             0.246116   
Demographic Parity Ratio (Age)                  0.087387   
Demographic Parity Difference (Sex)             0.172528   
Demographic Parity Ratio (Sex)                  0.304549   

                                     Reweighted Classifier  \
Metric                                                       
Demographic Parity Difference (Age)               0.100166   
Demographic Parity Ratio (Age)                    0.519463   
Demographic Parity Difference (Sex)               0.140902   
Demographic Parity Ratio (Sex)                    0.368751   

                                     Private+Fair Classifier  
Metric                                                        
Demographic Parity Difference (Age)                 0.1017

### Conclusion
#### Fairness Achieved with Reweighting and Privacy
 - Both the Fair Classifier and Private+Fair Classifier significantly
   reduce demographic disparities compared to the Original Classifier,
   achieving much better fairness with respect to Age and Sex.
  - Privacy constraints in the Private+Fair Classifier do not compromise
   fairness or performance, achieving parity with the Fair Classifier.

# 5 - Explainability


In [51]:
import warnings
warnings.filterwarnings("ignore")

import sys
sys.path.append("dependencies")

from omnixai.data.tabular import Tabular
from omnixai.preprocessing.tabular import TabularTransform
from omnixai.explainers.tabular import TabularExplainer
import xgboost as xgboost
from itertools import cycle, islice

ModuleNotFoundError: No module named 'omnixai'

In [None]:
print(f'Test: {private_accuracy=:.4f}')
#confusion_matrix for the private classifier
cm = confusion_matrix(Y_test_p, Y_pred_p)

TN = cm[0][0]
FN = cm[1][0]
TP = cm[1][1]
FP = cm[0][1]
print(f"Test: {TP=}, {TN=}, {FP=}, {FN=}")

disp = ConfusionMatrixDisplay(confusion_matrix=cm, )
disp.plot();

### 5.1 Instances where the model is wrong but highly confident:

In [None]:
# Find instances where the predicted label is different from the actual label
miss_indices = np.where(Y_pred_p != Y_test_p)[0]

# Create the prediction probabilities
proba = private_classifier.predict_proba(X_test_scaled_p)

# Find instances where the model is very confident but wrong
miss_but_confident = []

high_confidence_threshold = 0.95 # the value describing what is considered as a high confidence.

for idx in miss_indices:
    if max(proba[idx]) > high_confidence_threshold:
        miss_but_confident.append(idx)
    
print(f"There are {len(miss_but_confident)} instances where the model is very confident but wrong.")

max_nb_instances_to_detail = 2 # number of "miss_but_confident" to show in detail , shows all if -1 or none
print(f"Maximum number of instances to analyse is set to {max_nb_instances_to_detail}")

if max_nb_instances_to_detail is not None and max_nb_instances_to_detail >= 0:
    slice_nb = min(max_nb_instances_to_detail,len(miss_but_confident))
    miss_but_confident_detail = miss_but_confident[:slice_nb]
else:
    miss_but_confident_detail = miss_but_confident

for instance_id in miss_but_confident_detail:
    print(f"\t-> Instance {instance_id} has label '{Y_test_p.values[instance_id]}' and prediction '{Y_pred_p[instance_id]}', with probs {private_classifier.predict_proba(X_test_scaled_p[instance_id:instance_id+1])[0]}")

### 5.2 Explanation:

In [None]:
# put the input data into a pandas dataframe
tab_prep_data_df = pd.DataFrame(X_train_scaled_p,columns=X_p.columns)
y_col_name = Y_p.name #"y"
tab_prep_data_df[y_col_name] = Y_train_p.values

tab_prep_data_df.head()

In [None]:
# put the input data in omnixai Tabular form
tabular_data = Tabular(
   tab_prep_data_df,
   target_column=y_col_name
)
transformer = TabularTransform().fit(tabular_data)
class_names = transformer.class_names

In [None]:
for instance_id in miss_but_confident_detail:

    print(f"========== INSTANCE {instance_id}: ==========")
    
    explainers = TabularExplainer(
      explainers=['lime', 'mace'],                       # The explainers to apply
      mode="classification",                             # The task type
      data=transformer.invert(X_train_scaled_p),         # The data for initializing the explainers
      model=private_classifier,                          # The ML model to explain
      preprocess=lambda z: transformer.transform(z),     # Converts raw features into the model inputs
      params={
            "mace": {"ignored_features": ["Age", "Sex"]}# params which cannot change when creating counterfactuals
        }
    )
    
    test_instances = transformer.invert(X_test_scaled_p)[instance_id:instance_id+1]
    local_explanations = explainers.explain(X=test_instances)

    print("LIME explanation")

    print(f"Instance {instance_id} has label {Y_test_p.values[instance_id]} but prediction {Y_pred_private_fair[instance_id]}, with probs {private_classifier.predict_proba(X_test_scaled_p[instance_id:instance_id+1])[0]}")
    local_explanations["lime"].ipython_plot(index=0, class_names=class_names)

    print("MACE explanation")

    query_df = local_explanations['mace'].get_explanations()[0]['query'].reset_index(drop=True)
    query_df.index = [f"{instance_id}"] * len(query_df)
    
    counter_df = local_explanations['mace'].get_explanations()[0]['counterfactual'].reset_index(drop=True)
    counter_df.index = [ f"CF[{cnt}] for {instance_id}" for cnt in range(len(counter_df))]
    
    combined_df = pd.concat([query_df, counter_df])

    # Highlighting function
    def highlight_changes(row):
        instance_row = combined_df.iloc[0]  # Reference row for the instance
        return ["color: red" if row[col] != instance_row[col] else "" for col in combined_df.columns]
    
    styled_df = combined_df.style.apply(highlight_changes, axis=1)
    
    display(styled_df)
    print("----------------------------------------")

### 5.3 Are the noisy values for the sensitive values of Age and Sex attributes responsible for the model being confident and wrong?

#### Looking at all the test cases

In [None]:
# regroup data of age and sex from the original and private parts
expl_df = comparison_df.copy()

expl_test_df = expl_df.loc[X_test_p.index]
expl_test_df.head()

In [None]:
Y_test_p

In [None]:
expl_test_df["real_y"] = Y_test_p.values
expl_test_df["pred_y"] = Y_pred_p

In [None]:
expl_test_df.head()

In [None]:
expl_test_df["age_abs_diff"] = abs(expl_test_df["Original Age"]-expl_test_df["Private Age"])
expl_test_df["sex_change"] = (expl_test_df["Original Sex"] != expl_test_df["Private Sex"])
expl_test_df["pred_change"] = (expl_test_df["real_y"] != expl_test_df["pred_y"])

expl_miss_but_conf_df = expl_test_df.iloc[miss_but_confident]

expl_test_df["new_id"] = range(len(expl_test_df))
expl_miss_but_conf_df["new_id"] = range(len(expl_miss_but_conf_df))

check_all_test = True # allows to focus at only "confident+miss" or "all testset" in the plots

expl_check_df = expl_test_df if check_all_test else expl_miss_but_conf_df
expl_check_df.head()

In [None]:
# create smaller dataframes of all combinations of having the sex and/or the prediction
snp_df = expl_check_df.loc[expl_check_df["sex_change"] & ~expl_check_df["pred_change"]] #light green
nsnp_df = expl_check_df.loc[~expl_check_df["sex_change"] & ~expl_check_df["pred_change"]] #dark green
sp_df = expl_check_df.loc[expl_check_df["sex_change"] & expl_check_df["pred_change"]] #light red
nsp_df = expl_check_df.loc[~expl_check_df["sex_change"] & expl_check_df["pred_change"]] #dark red

In [None]:
nb_snp = len(snp_df)
nb_nsnp = len(nsnp_df)
nb_sp = len(sp_df)
nb_nsp = len(nsp_df)
print("sex change but no pred change:",nb_snp)
print("no sex change and no pred change:",nb_nsnp)
print("sex change and pred change:",nb_sp)
print("no sex change but pred change:",nb_nsp)

In [None]:
def prep_df(df,group_num):
    new_df = df.copy()[["new_id","age_abs_diff"]]
    new_df["group"] = group_num
    return new_df

snp_name = "sex change, no pred change"
nsnp_name = "no sex change, no pred change"
sp_name = "sex change, pred change"
nsp_name = "no sex change, pred change"

regoup_df = prep_df(snp_df,snp_name)
regoup_df = pd.concat([regoup_df, prep_df(nsnp_df,nsnp_name)])
regoup_df = pd.concat([regoup_df, prep_df(sp_df,sp_name)])
regoup_df = pd.concat([regoup_df, prep_df(nsp_df,nsp_name)])

regoup_df.head()

In [None]:
import hvplot.pandas

sex_pred_color_dict = {snp_name:"lime",
                       nsnp_name:"green",
                       sp_name:"orange",
                       nsp_name:"red"}

group_order = list(regoup_df["group"].drop_duplicates()) # to enforce name-color association

regoup_df.hvplot(
    x="new_id", 
    xlabel="",
    y="age_abs_diff",
    ylabel="absolute age difference", 
    kind='scatter', 
    by="group",
    color=[sex_pred_color_dict[name] for name in group_order],
    title="sex and age change impact on prediction",
    width=1100
)

In [None]:
# create smaller dataframes for having or not a prediction change
np_df = expl_check_df.loc[~expl_check_df["pred_change"]] #green
p_df = expl_check_df.loc[expl_check_df["pred_change"]] #red

regoup_df = prep_df(np_df,"no pred change")
regoup_df = pd.concat([regoup_df, prep_df(p_df,"pred change")])

In [None]:
pred_color_dict = {"no pred change":"green",
                   "pred change":"red"}

group_order = list(regoup_df["group"].drop_duplicates()) # to enforce name-color association

regoup_df.hvplot(
    x="new_id", 
    xlabel="",
    y="age_abs_diff",
    ylabel="absolute age difference",
    kind='scatter', 
    by="group",
    color=[pred_color_dict[name] for name in group_order],
    title="age change impact on prediction",
    width=1100
)

In [None]:
def ratio(val1,val2): return round(val1*100/(val1+val2),2)

bar_dict = {"pred changes":[],"no pred changes":[],"age":[]}

possible_age_diff_values = list(regoup_df["age_abs_diff"].drop_duplicates())
for age_diff in possible_age_diff_values:
    bar_dict["age"].append(age_diff)
    nb_age_p_changes = len(p_df[p_df['age_abs_diff']==age_diff])
    bar_dict["pred changes"].append(nb_age_p_changes)
    nb_age_np_changes = len(np_df[np_df['age_abs_diff']==age_diff])
    bar_dict["no pred changes"].append(nb_age_np_changes)
    print(f"Age diff of {age_diff}: {nb_age_p_changes} pred changes and {nb_age_np_changes} no pred changes: {ratio(nb_age_p_changes,nb_age_np_changes)}% ratio")

In [None]:
bar_df = pd.DataFrame(bar_dict)
bar_df["total age cases"] = bar_df["pred changes"] + bar_df["no pred changes"]
bar_df["pred changes norm"] = bar_df["pred changes"]/bar_df["total age cases"]
bar_df["no pred changes norm"] = bar_df["no pred changes"]/bar_df["total age cases"]
bar_df

In [None]:
bar_df.hvplot(
    x="age", 
    xlabel="ages",
    y=["pred changes norm","no pred changes norm"],
    ylabel="amount of cases (normalised)",
    kind='bar', 
    #by="ages",
    #color=[pred_color_dict[name] for name in group_order],
    title="normalised age change impact on prediction",
    width=1100
)

The amount of pred changes relative to the no pred changes seems to be indentical or to slightly increase the more age difference there is but this can be due to the fewer cases at larger differences

In [None]:
import matplotlib.pyplot as plt

x = [0, 0, 1, 1] # sex change (0:no,1:yes)
y = [0, 1, 0, 1] # pred chage (0:no,1:yes)
n = [nb_nsnp, nb_nsp, nb_snp, nb_sp] # dot text
s = n # dot size
c=[sex_pred_color_dict[name] for name in [nsnp_name,nsp_name,snp_name,sp_name]] # dot color

fig, ax = plt.subplots()

fig.set_figheight(5)
fig.set_figwidth(5)

ax.scatter(x, y,s=s,c=c)
ax.set(xlim=(-0.5, 1.5),ylim=(-0.5, 1.5))

ax.set_title("sex change impact on prediction")
ax.set_xlabel("sex change")
ax.set_xticks([0,1], labels=["no","yes"], minor=False)
ax.set_ylabel("pred change")
ax.set_yticks([0,1], labels=["no","yes"], minor=False)

for i, txt in enumerate(n):
    ax.annotate(txt, (x[i], y[i]))

Sex change (noise of the sex data) isn't clearly responsible for a change of prediction capability since only ~150 cases were measured in this case. \
In contrast, ~550 cases were observed were the predition was changed even without changes to the sex.

In [None]:
print(f"% of prediction change when no sex change: {ratio(nb_nsp,nb_sp)}%")
print(f"% of prediction change when sex change: {ratio(nb_nsnp,nb_snp)}%")

The % of prediction change when with or without sex change are similar

#### Looking at only the cases where the model is wrong but confident

In [None]:
expl_miss_but_conf_df.head()

In [None]:
nb_miss_but_conf = len(expl_miss_but_conf_df)
print("Average age change while 'miss but confident':",sum(expl_miss_but_conf_df["age_abs_diff"])/nb_miss_but_conf)
print(f"While 'miss but confident' age was changed {len(expl_miss_but_conf_df[expl_miss_but_conf_df['age_abs_diff'] != 0])} times on {nb_miss_but_conf}")
print(f"While 'miss but confident' sex was changed {len(expl_miss_but_conf_df[expl_miss_but_conf_df['sex_change']])} times on {nb_miss_but_conf}")
print(f"While 'miss but confident' both were changed {len(expl_miss_but_conf_df[expl_miss_but_conf_df['sex_change'] & expl_miss_but_conf_df['age_abs_diff'] != 0])} times on {nb_miss_but_conf}")

#  6 - Explainability and LLMs

### Configuration LM server
We assume that one will launch LM Studio and launch its server.

Model used is same as in practice: Llama-3.2.3b-instruct.

In [1]:
from openai import OpenAI

client = OpenAI(
    api_key="llama-3.2-3b-instruct", #not necessary as connected directly to localhost LM server
    base_url="http://localhost:1234/v1"
)

### Specification for the model

Here I specify to the system the context of the request, it tries to be an explainer for the results of LIME method and interprets it in human-readable text

In [2]:
explain_method="LIME"
context_system= ("You're an interpreter of a explainability method for a classifier."
                " The method used is " + explain_method + "."
                " You need to explain in a simple way anyone can understand what the values of the parameters from the explanation actually mean for the classifier.")

messages = [ 
    {"role": "system", "content": context_system}
]

def get_response(message, messages):
    messages.append({"role": "user", "content": message})
    completion = client.chat.completions.create(
        messages=messages,
        temperature=0.4,
        model="model"
    )
    response = completion.choices[0].message.content
    messages.append({"role": "assistant", "content": response})
    return response, messages

For the purpose of the example, we use an instance that was highly confident on a wrong prediction to learn why it mispredicted. We look at the first instance previously analyzed on [5. Explainability](#5---explainability)

In [None]:
example_id = miss_but_confident_detail[0]

explainers = TabularExplainer(
    explainers=['lime', 'mace'],                       # The explainers to apply
    mode="classification",                             # The task type
    data=transformer.invert(X_train_scaled_p),         # The data for initializing the explainers
    model=private_classifier,                          # The ML model to explain
    preprocess=lambda z: transformer.transform(z),     # Converts raw features into the model inputs
    params={
        "mace": {"ignored_features": ["Age", "Sex"]}# params which cannot change when creating counterfactuals
    }
)

test_instances = transformer.invert(X_test_scaled_p)[example_id:example_id+1]
local_explanations = explainers.explain(X=test_instances)

true_label, pred_label = Y_test_p.values[example_id], Y_pred_private_fair[example_id]
probs = private_classifier.predict_proba(X_test_scaled_p[example_id:example_id+1])[0]

We formulate the request here. One would give the pairs of features-importance scores but also the true label and prediction label to get an answer from the language through the server.

Unfortunately, it takes a few minutes to execute.

In [None]:
lime_info_dict = local_explanations["lime"].get_explanations()[0]
feat_score_pairs = list(zip(lime_info_dict['features'],lime_info_dict['scores']))

prefix = "Here are the features and their score with " + explain_method + " method:\n\n"
for i in range (len(feat_score_pairs)):
    prefix += "Name of feature: " + str(feat_score_pairs[i][0]) + ", Importance score: " + str(feat_score_pairs[i][1]) + "\n"

suffix = f"\nFor the example {example_id}, true label was {true_label} and prediction label was {pred_label}. Can you explain to me what features were the most influential to get a bad prediction ?"

message = prefix + suffix
print(message,"\n")
print("--------------------------------------------------------------------------------")
response, messages = get_response(message, messages)
print(response)

Here are the features and their score with LIME method:

Name of feature: capital-gain, Importance score: 0.5226965078276797
Name of feature: education_Preschool, Importance score: -0.36581589319378627
Name of feature: occupation_Priv-house-serv, Importance score: -0.3354421907663196
Name of feature: workclass_Without-pay, Importance score: -0.2924377615117225
Name of feature: occupation_Armed-Forces, Importance score: -0.2740265598030081
Name of feature: marital-status_Married-AF-spouse, Importance score: 0.25956166029686994
Name of feature: native-country_Hong, Importance score: 0.22241719269040844
Name of feature: native-country_Cambodia, Importance score: 0.21128383610602153
Name of feature: native-country_Columbia, Importance score: -0.20444894299948574
Name of feature: native-country_Honduras, Importance score: -0.06931146789191218

For the example 246, true label was True and prediction label was False. Can you explain to me what features were the most influential to get a bad p

# 7 - Free Exploration

In [None]:
print("Last execution of this notebook:",datetime.utcnow()+timedelta(hours=1))