# FairDa reimplementation on Adult dataset

In [1]:
from sklearn.metrics import accuracy_score, f1_score

In [2]:
!pip install fairlearn

Collecting fairlearn
  Downloading fairlearn-0.10.0-py3-none-any.whl (234 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m234.1/234.1 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting numpy>=1.24.4 (from fairlearn)
  Downloading numpy-1.26.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.2/18.2 MB[0m [31m61.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pandas>=2.0.3 (from fairlearn)
  Downloading pandas-2.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.0/13.0 MB[0m [31m82.4 MB/s[0m eta [36m0:00:00[0m
Collecting tzdata>=2022.7 (from pandas>=2.0.3->fairlearn)
  Downloading tzdata-2023.4-py2.py3-none-any.whl (346 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m346.6/346.6 kB[0m [31m34.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: 

# Importing and adjusting data


In [3]:
import pandas as pd

In [4]:
df = pd.read_csv("adult.csv", index_col=0, sep=",")

list(df.columns)

['age',
 'fnlwgt',
 'education-num',
 'capital-gain',
 'capital-loss',
 'hours-per-week',
 'birth_year',
 'workclass_ ?',
 'workclass_ Federal-gov',
 'workclass_ Local-gov',
 'workclass_ Never-worked',
 'workclass_ Private',
 'workclass_ Self-emp-inc',
 'workclass_ Self-emp-not-inc',
 'workclass_ State-gov',
 'workclass_ Without-pay',
 'education_ 10th',
 'education_ 11th',
 'education_ 12th',
 'education_ 1st-4th',
 'education_ 5th-6th',
 'education_ 7th-8th',
 'education_ 9th',
 'education_ Assoc-acdm',
 'education_ Assoc-voc',
 'education_ Bachelors',
 'education_ Doctorate',
 'education_ HS-grad',
 'education_ Masters',
 'education_ Preschool',
 'education_ Prof-school',
 'education_ Some-college',
 'marital-status_ Divorced',
 'marital-status_ Married-AF-spouse',
 'marital-status_ Married-civ-spouse',
 'marital-status_ Married-spouse-absent',
 'marital-status_ Never-married',
 'marital-status_ Separated',
 'marital-status_ Widowed',
 'occupation_ ?',
 'occupation_ Adm-clerical',
 

In [5]:
df.drop(["salary_ <=50K", "sex_ Female"], axis=1, inplace=True)

# Split the df in source and target domain, hiding sensitive attributes in df2

In [6]:
df1 = df.loc[df["native-country_ United-States"] == 1].copy()
df2 = df.loc[df["native-country_ United-States"] == 0].copy()
df1["d"] = 0
df2["d"] = 1

true_A2 = df2["sex_ Male"]
df2["sex_ Male"] = -1

len(df1), len(df2)

(29170, 3391)

# Defining architecture elements and training step


In [7]:
import torch
import torch.nn as nn
import torch.optim as optim

# Define the feature extractor
class FeatureExtractor(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super(FeatureExtractor, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU()
        )

    def forward(self, x):
        return self.network(x)

# Define the classifier
class Classifier(nn.Module):
    def __init__(self, hidden_dim, output_dim):
        super(Classifier, self).__init__()
        self.network = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        return self.network(x)

# Define the adversary
class Adversary(nn.Module):
    def __init__(self, hidden_dim, sensitive_attr_dim):
        super(Adversary, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, sensitive_attr_dim)
        )

    def forward(self, x):
        return self.network(x)

# Define a function to update the models
def train_step(models, optimizers, criterion, x, y, s, alpha, mode):
    feature_extractor, classifier, adversary = models
    optimizer_fe, optimizer_c, optimizer_a = optimizers

    # Zero the parameter gradients
    optimizer_fe.zero_grad()
    optimizer_c.zero_grad()
    optimizer_a.zero_grad()


    # Forward pass to compute features
    features = feature_extractor(x)

    # Classifier loss and update
    pred_y = classifier(features.detach())

    loss_y = 0
    if mode == 1:
      if 0 in s:
          mask = s == 0
          loss_y = criterion(pred_y[mask], y[mask])
    else:
      loss_y = criterion(pred_y, y)
    if loss_y != 0:
      loss_y.backward(retain_graph=True)  # Retain graph for subsequent backward pass
      optimizer_c.step()



    # Adversary loss and update
    pred_s = adversary(features.detach())  # Detach to prevent gradients from flowing back to feature_extractor
    loss_s = criterion(pred_s, s)
    loss_s.backward()
    optimizer_a.step()

    # Clear gradients for feature extractor update
    optimizer_fe.zero_grad()
    # Combined loss for feature extractor
    # Note: We recalculate pred_s to allow gradients to flow from the adversary prediction
    pred_s = adversary(features)
    pred_y = classifier(features)

    loss_y = 0
    if mode == 1:
      if 0 in s:
          mask = s == 0
          loss_y = criterion(pred_y[mask], y[mask])
    else:
      loss_y = criterion(pred_y, y)

    loss_combined = loss_y - alpha * criterion(pred_s, s)

    loss_combined.backward()
    optimizer_fe.step()

    return loss_y.item(), loss_s.item()



In [8]:
def train_adversarial_model(epochs, dataloader, models, optimizers, criterion, hp_adversarial, mode):

  # Example training loop
  for epoch in range(epochs):
      running_loss_y = 0.0
      running_loss_s = 0.0
      for i, data in enumerate(dataloader):
          inputs, labels, sensitive_attributes = data

          # Convert labels and sensitive attributes to the correct shape and type if necessary
          labels = labels.view(-1, 1).float()  # Adjust for BCEWithLogitsLoss
          sensitive_attributes = sensitive_attributes.view(-1, 1).float()  # Adjust for BCEWithLogitsLoss

          # Training step
          loss_y, loss_s = train_step(
              models,
              optimizers,
              criterion, inputs, labels, sensitive_attributes, hp_adversarial,
              mode
          )

          # Update running loss
          running_loss_y += loss_y
          running_loss_s += loss_s

      # Logging
      print(f"Epoch {epoch + 1}, Loss Y: {running_loss_y / len(dataloader)}, Loss S: {running_loss_s / len(dataloader)}")


# Train the domain adaptation part to predict Â2

```
# This is formatted as code
```



In [9]:
from torch.utils.data import DataLoader, TensorDataset

df_mix = pd.concat([df1, df2])

# Assuming 'df' is your pandas DataFrame
X1 = df_mix.drop(columns=['salary_ >50K', 'sex_ Male', 'd']).values
Y1 = df_mix['sex_ Male'].values
A1 = df_mix['d'].values

# Convert to PyTorch tensors
X1_tensor = torch.tensor(X1, dtype=torch.float32)
Y1_tensor = torch.tensor(Y1[:, None], dtype=torch.float32)  # Add an extra dimension to match the expected shape
A1_tensor = torch.tensor(A1[:, None], dtype=torch.float32)  # Add an extra dimension

# Create a TensorDataset and DataLoader
dataset = TensorDataset(X1_tensor, Y1_tensor, A1_tensor)
dataloader1 = DataLoader(dataset, batch_size=32, shuffle=True)

In [10]:
# Hyperparameters and dimensions
input_dim = 107  # Example input dimension, adjust as needed
hidden_dim = 64  # Hidden layer size
output_dim = 1   # Binary classification
sensitive_attr_dim = 1  # Assuming binary sensitive attribute

# Instantiate models
h1 = FeatureExtractor(input_dim, hidden_dim)
s_a_predictor = Classifier(hidden_dim, output_dim)
domain_classifier = Adversary(hidden_dim, sensitive_attr_dim)

models1 = (h1, s_a_predictor, domain_classifier)

# Optimizers
optimizer_h1 = optim.RMSprop(h1.parameters(), lr= 0.0001)
optimizer_s_a_p= optim.RMSprop(s_a_predictor.parameters(), lr= 0.0001)
optimizer_d_c = optim.RMSprop(domain_classifier.parameters(), lr= 0.0001)

optimizers1 = (optimizer_h1, optimizer_s_a_p, optimizer_d_c)

# Loss function
criterion = nn.BCEWithLogitsLoss()

alpha = 0.01  # Weight for adversarial loss

In [11]:
epochs = 30

train_adversarial_model(epochs, dataloader1, models1, optimizers1, criterion, alpha, 1)

Epoch 1, Loss Y: 0.6245725642018796, Loss S: 0.34228424577514527
Epoch 2, Loss Y: 0.5743255516283639, Loss S: 0.34248154385341056
Epoch 3, Loss Y: 0.5313864816675486, Loss S: 0.344154826967386
Epoch 4, Loss Y: 0.49015928450642493, Loss S: 0.34505286411343833
Epoch 5, Loss Y: 0.45582054701792935, Loss S: 0.3446372425575212
Epoch 6, Loss Y: 0.4261260122968312, Loss S: 0.3450705972015272
Epoch 7, Loss Y: 0.4053308681121978, Loss S: 0.34530231958229557
Epoch 8, Loss Y: 0.3865554313118659, Loss S: 0.34590256881986126
Epoch 9, Loss Y: 0.3708979756606117, Loss S: 0.34411363822567204
Epoch 10, Loss Y: 0.36043610275962734, Loss S: 0.34356570769328265
Epoch 11, Loss Y: 0.34989884224938034, Loss S: 0.3439422819675596
Epoch 12, Loss Y: 0.3411113126075338, Loss S: 0.345488194409165
Epoch 13, Loss Y: 0.3346946685920764, Loss S: 0.34574201559622303
Epoch 14, Loss Y: 0.3286405994112225, Loss S: 0.34599252416585774
Epoch 15, Loss Y: 0.322851879057458, Loss S: 0.3481817289579418
Epoch 16, Loss Y: 0.3201

# Train the Fairness Part now that we can get Â2

In [12]:
# Assuming 'df' is your pandas DataFrame
X2 = df2.drop(columns=['salary_ >50K', 'sex_ Male', 'd']).values
Y2 = df2['salary_ >50K'].values

# Convert to PyTorch tensors
X2_tensor = torch.tensor(X2, dtype=torch.float32)
Y2_tensor = torch.tensor(Y2[:, None], dtype=torch.float32)  # Add an extra dimension to match the expected shape



### Predict Â2 with the adversarial domain adaptation network we trained

In [13]:
# compute the real A2 for later
true_A2_tensor = torch.tensor((true_A2.values)[:, None], dtype=torch.float32)  # Add an extra dimension

#compute Â2
A2_tensor = (torch.sigmoid(s_a_predictor(h1(X2_tensor))) > 0.5).int()
A2_tensor

tensor([[0],
        [0],
        [1],
        ...,
        [1],
        [1],
        [0]], dtype=torch.int32)

In [14]:
# Create a TensorDataset and DataLoader
dataset = TensorDataset(X2_tensor, Y2_tensor, A2_tensor)
dataloader2 = DataLoader(dataset, batch_size=32, shuffle=True)

In [15]:
# Hyperparameters and dimensions
input_dim = 107  # Example input dimension, adjust as needed
hidden_dim = 64  # Hidden layer size
output_dim = 1   # Binary classification
sensitive_attr_dim = 1  # Assuming binary sensitive attribute

# Instantiate models
h2 = FeatureExtractor(input_dim, hidden_dim)
label_predictor = Classifier(hidden_dim, output_dim)
bias_predictor = Adversary(hidden_dim, sensitive_attr_dim)

models2 = (h2, label_predictor, bias_predictor)

# Optimizers
optimizer_h2 = optim.RMSprop(h2.parameters(), lr= 0.0002)
optimizer_l_p= optim.RMSprop(label_predictor.parameters(), lr= 0.0002)
optimizer_b_p = optim.RMSprop(bias_predictor.parameters(), lr= 0.0002)

optimizers2 = (optimizer_h2, optimizer_l_p, optimizer_b_p)

# Loss function
criterion = nn.BCEWithLogitsLoss()

beta = 0.3  # Weight for adversarial loss

In [16]:
epochs = 300

train_adversarial_model(epochs, dataloader2, models2, optimizers2, criterion, beta, 2)

Epoch 1, Loss Y: 0.6831061490020662, Loss S: 0.741703858353057
Epoch 2, Loss Y: 0.5126694511130171, Loss S: 0.6927883934299901
Epoch 3, Loss Y: 0.4891817344809478, Loss S: 0.6943424397482062
Epoch 4, Loss Y: 0.4739761069979308, Loss S: 0.6881923546206277
Epoch 5, Loss Y: 0.4726821116681369, Loss S: 0.6943013639944904
Epoch 6, Loss Y: 0.45183098892558293, Loss S: 0.690676434984747
Epoch 7, Loss Y: 0.4511213215454569, Loss S: 0.6921497015458233
Epoch 8, Loss Y: 0.44603801490563266, Loss S: 0.692603516128828
Epoch 9, Loss Y: 0.42584619881971825, Loss S: 0.6934196352958679
Epoch 10, Loss Y: 0.42176645122609047, Loss S: 0.6895025305028232
Epoch 11, Loss Y: 0.415592186574666, Loss S: 0.6895635544129137
Epoch 12, Loss Y: 0.4059494501858387, Loss S: 0.6918244148200413
Epoch 13, Loss Y: 0.40203535451360467, Loss S: 0.6875761989152657
Epoch 14, Loss Y: 0.39008041049511927, Loss S: 0.6860610696504701
Epoch 15, Loss Y: 0.39126933773733535, Loss S: 0.6908730577747777
Epoch 16, Loss Y: 0.38638375291

# Evaluation


In [17]:
predictions = label_predictor(h2(X2_tensor))

predictions_binary = (predictions > 0.5).float()  # Convert to binary predictions
acc = accuracy_score(Y2_tensor, predictions_binary)
f1 = f1_score(Y2_tensor, predictions_binary)
acc, f1

(0.8475375995281628, 0.4009269988412515)

In [19]:
from fairlearn.metrics import demographic_parity_difference, equalized_odds_difference

# Compute DP and EO
dp = demographic_parity_difference(Y2_tensor, predictions_binary, sensitive_features=true_A2_tensor)
eo = equalized_odds_difference(Y2_tensor, predictions_binary, sensitive_features=true_A2_tensor)
dp,eo

(0.03784906963960751, 0.004133397519961446)

In [20]:
fairda_metrics = [acc, f1, dp, eo]

In [21]:
print(f"FairDa result on Adult dataset : \nacc: {acc}\nf1: {f1}\ndp: {dp}\neo: {eo}")

FairDa result on Adult dataset : 
acc: 0.8475375995281628
f1: 0.4009269988412515
dp: 0.03784906963960751
eo: 0.004133397519961446


# Train a Vanilla for benchmark

In [28]:
import torch
from torch.utils.data import DataLoader

input_dim = 107  # Example input dimension, adjust as needed
hidden_dim = 64  # Hidden layer size
output_dim = 1   # Binary classification

# Instantiate models
feature_extractor = FeatureExtractor(input_dim, hidden_dim)
classifier = Classifier(hidden_dim, output_dim)

# Define the loss function and optimizer
criterion = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.RMSprop(list(feature_extractor.parameters()) + list(classifier.parameters()), lr=0.0001)
# Define a learning rate scheduler
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

# Define the DataLoader for your dataset
train_loader = DataLoader(TensorDataset(X2_tensor, Y2_tensor,), batch_size=32, shuffle=True)

# Training loop
num_epochs = 100  # Define the number of epochs

clip_value = 1.0 # Maximum allowed norm for gradients

for epoch in range(num_epochs):
    for x, y in train_loader:  # Ignoring sensitive attributes for vanilla training
        optimizer.zero_grad()  # Clear previous gradients
        features = feature_extractor(x)
        predictions = classifier(features)
        loss = criterion(predictions, y.view(-1, 1).float())
        loss.backward()  # Compute gradients
        # Gradient clipping
        torch.nn.utils.clip_grad_norm_(list(feature_extractor.parameters()) + list(classifier.parameters()), clip_value)

        optimizer.step()  # Update model weights

    print(f"Epoch {epoch+1}, Loss: {loss.item()}")

Epoch 1, Loss: 0.3684524893760681
Epoch 2, Loss: 0.5684723854064941
Epoch 3, Loss: 0.5175544023513794
Epoch 4, Loss: 0.4307335913181305
Epoch 5, Loss: 0.6406301259994507
Epoch 6, Loss: 0.5436066389083862
Epoch 7, Loss: 0.543087363243103
Epoch 8, Loss: 0.5228539705276489
Epoch 9, Loss: 0.4462698698043823
Epoch 10, Loss: 0.35516899824142456
Epoch 11, Loss: 0.32013535499572754
Epoch 12, Loss: 0.5220738053321838
Epoch 13, Loss: 0.6063972115516663
Epoch 14, Loss: 0.7262631058692932
Epoch 15, Loss: 0.4971524775028229
Epoch 16, Loss: 0.43266284465789795
Epoch 17, Loss: 0.44221264123916626
Epoch 18, Loss: 0.5493496060371399
Epoch 19, Loss: 0.3286898136138916
Epoch 20, Loss: 0.3830309510231018
Epoch 21, Loss: 0.576054036617279
Epoch 22, Loss: 0.40698304772377014
Epoch 23, Loss: 0.4009970426559448
Epoch 24, Loss: 0.5060322880744934
Epoch 25, Loss: 0.30005866289138794
Epoch 26, Loss: 0.3599678874015808
Epoch 27, Loss: 0.2312154471874237
Epoch 28, Loss: 0.4251812994480133
Epoch 29, Loss: 0.4052868

In [29]:
from sklearn.metrics import accuracy_score, f1_score

predictions = classifier(feature_extractor(X2_tensor))

predictions_binary = (predictions > 0.5).float()  # Convert to binary predictions
acc = accuracy_score(Y2_tensor, predictions_binary)
f1 = f1_score(Y2_tensor, predictions_binary)
acc, f1

(0.865526393394279, 0.5555555555555556)

In [30]:
from fairlearn.metrics import demographic_parity_difference, equalized_odds_difference

# Compute DP and EO
dp = demographic_parity_difference(Y2_tensor, predictions_binary, sensitive_features=true_A2_tensor)
eo = equalized_odds_difference(Y2_tensor, predictions_binary, sensitive_features=true_A2_tensor)
dp,eo

(0.11812581226529573, 0.28377682973390217)

In [31]:
vanilla_metrics = [acc, f1, dp, eo]

In [32]:
print(f"Vanilla result on Adult dataset : \nacc: {acc}\nf1: {f1}\ndp: {dp}\neo: {eo}")

Vanilla result on Adult dataset : 
acc: 0.865526393394279
f1: 0.5555555555555556
dp: 0.11812581226529573
eo: 0.28377682973390217


In [33]:
pd.DataFrame([fairda_metrics, vanilla_metrics], columns= ["accuracy", "f1_score", "demographic_parity", "equal_odds"], index = ["FairDA", "Vanilla"])

Unnamed: 0,accuracy,f1_score,demographic_parity,equal_odds
FairDA,0.847538,0.400927,0.037849,0.004133
Vanilla,0.865526,0.555556,0.118126,0.283777
