In [1]:
%load_ext autoreload
%autoreload 2

from pathlib import Path

import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import torch
import torch.nn.functional as F
import torch.optim as optim

import os, sys
sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), "../../")))
from libs import data as dt, neuronshap as ns, sim
from cfgs.fedargs import *

from fairlearn.metrics import (
    demographic_parity_difference,
    demographic_parity_ratio,
    equalized_odds_difference,
    equalized_odds_ratio,
)
from libs.helpers.finance import bin_hours_per_week, bin_edu_level, bin_age_level, bin_marital_status_level
from libs.helpers.metrics import (
    conditional_demographic_parity_difference,
    conditional_demographic_parity_ratio,
)
from libs.helpers.plot import group_box_plots

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
df = pd.read_csv('../../data/dutch/dutch.csv')
#df.head(100)

In [38]:
df['sex']=[1 if v == 'male' else 0 for v in df['sex']]

le = preprocessing.LabelEncoder()
for i in df.columns:
    if df[i].dtypes == 'object':
        df[i] = le.fit_transform(df[i])

'''
cts_features = ['household_size', 'edu_level', 'age', 'household_position']

ss = StandardScaler()

df[cts_features] = ss.fit_transform(df[cts_features])
'''

df.head()
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 60420 entries, 0 to 60419
Data columns (total 12 columns):
 #   Column                Non-Null Count  Dtype
---  ------                --------------  -----
 0   sex                   60420 non-null  int64
 1   age                   60420 non-null  int64
 2   household_position    60420 non-null  int64
 3   household_size        60420 non-null  int64
 4   prev_residence_place  60420 non-null  int64
 5   citizenship           60420 non-null  int64
 6   country_birth         60420 non-null  int64
 7   edu_level             60420 non-null  int64
 8   economic_status       60420 non-null  int64
 9   cur_eco_activity      60420 non-null  int64
 10  marital_status        60420 non-null  int64
 11  occupation            60420 non-null  int64
dtypes: int64(12)
memory usage: 5.5 MB


In [4]:
df["marital_status"].value_counts()

2    36655
1    19656
4     3566
3      543
Name: marital_status, dtype: int64

In [5]:
import copy

train, test_ = train_test_split(df, test_size=0.2, random_state=42)
test_ = test_.reset_index(drop=True)
train_oh, test_oh = copy.deepcopy(train), copy.deepcopy(test_)

cts_features = ['household_size', 'edu_level', 'age', 'household_position']
ss = StandardScaler()

train_oh[cts_features] = ss.fit_transform(train_oh[cts_features])
test_oh[cts_features] = ss.fit_transform(test_oh[cts_features])

In [6]:
test_.head(10)

Unnamed: 0,sex,age,household_position,household_size,prev_residence_place,citizenship,country_birth,edu_level,economic_status,cur_eco_activity,marital_status,occupation
0,0,7,1121,112,1,1,1,3,111,122,2,0
1,1,4,1110,114,1,1,1,2,111,131,1,0
2,0,7,1121,112,1,1,1,4,111,134,2,0
3,1,8,1132,125,1,1,1,1,111,134,4,0
4,1,8,1122,114,1,1,1,5,120,138,2,1
5,1,10,1121,112,1,1,1,3,111,122,2,1
6,0,7,1122,114,1,1,1,3,111,133,2,0
7,0,6,1110,114,1,1,1,5,111,131,1,1
8,0,11,1210,111,1,1,1,1,111,139,4,0
9,1,8,1110,113,1,1,1,1,111,122,1,0


In [7]:
#https://github.com/tailequy/fairness_dataset/blob/main/experiments/Fair-metrics.ipynb

In [8]:
m_dh_oh = test_oh.loc[test_oh["sex"] == 1]
m_dh_oh = m_dh_oh.head(100)
fm_dh_oh = test_oh.loc[test_oh["sex"] == 0]
fm_dh_oh = fm_dh_oh.head(100)

In [9]:
X_train = train_oh.drop(columns="occupation").values
Y_train = train_oh['occupation'].values
X_test = test_oh.drop(columns="occupation").values
Y_test = test_oh['occupation'].values
X_m = m_dh_oh.drop(columns="occupation").values
Y_m = m_dh_oh['occupation'].values
X_fm = fm_dh_oh.drop(columns="occupation").values
Y_fm = fm_dh_oh['occupation'].values

#creating torch dataset and loader using original dataset. 
#to use resampled dataset, replace ex. xtrain with xtrain_over etc.
train_data = torch.utils.data.TensorDataset(torch.tensor(X_train).float(), torch.tensor(Y_train).long())
test_data = torch.utils.data.TensorDataset(torch.tensor(X_test).float(), torch.tensor(Y_test).long())
m_data = torch.utils.data.TensorDataset(torch.tensor(X_m).float(), torch.tensor(Y_m).long())
fm_data = torch.utils.data.TensorDataset(torch.tensor(X_fm).float(), torch.tensor(Y_fm).long())

train_loader = torch.utils.data.DataLoader(train_data,batch_size=128, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=len(test_data))
m_loader = torch.utils.data.DataLoader(m_data, batch_size=1)
fm_loader = torch.utils.data.DataLoader(fm_data, batch_size=1)

In [10]:
class BasicNet(torch.nn.Module):
    
    def __init__(self, num_features, num_classes):
        super().__init__()
        self.num_features = num_features
        self.num_classes = num_classes
        self.layers = 0
        
        self.lin1 = torch.nn.Linear(self.num_features,  150)        
        self.lin2 = torch.nn.Linear(50, 50)        
        self.lin3 = torch.nn.Linear(50, 50)
        
        self.lin4 = torch.nn.Linear(150, 150) 
        
        self.lin5 = torch.nn.Linear(50, 50)        
        self.lin6 = torch.nn.Linear(50, 50)
        self.lin10 = torch.nn.Linear(150, self.num_classes)
        
        self.prelu = torch.nn.PReLU()
        self.dropout = torch.nn.Dropout(0.25)

    def forward(self, xin):
        self.layers = 0
        
        x = F.relu(self.lin1(xin))
        self.layers += 1
        
        #x = F.relu(self.lin2(x))
        #self.layers += 1
        for y in range(8):
            x = F.relu(self.lin4(x)) 
            self.layers += 1
           
        x = self.dropout(x)
        
        x = F.relu(self.lin10(x)) 
        self.layers += 1
        return x

In [11]:
def train(model, train_loader, optimizer, epoch):
    model.train()
    
    for inputs, target in train_loader:
      
        #inputs, target = inputs.to(device), target.to(device)
        
        optimizer.zero_grad()
        output = model(inputs)
        loss = loss_fn(output, target.long())
        # Backprop
        loss.backward()
        optimizer.step()
        ###

In [12]:
def test(model, test_loader):
    model.eval()
    
    test_loss = 0
    correct = 0
    test_size = 0
    
    with torch.no_grad():
      
        for inputs, target in test_loader:
            
            #inputs, target = inputs.to(device), target.to(device)
            
            output = model(inputs)
            test_size += len(inputs)
            test_loss += test_loss_fn(output, target.long()).item() 
            pred = output.max(1, keepdim=True)[1] 
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= test_size
    accuracy = correct / test_size
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, test_size,
        100. * accuracy))
    
    return test_loss, accuracy

In [14]:
model = BasicNet(11, 2)
test_accuracy = []
train_loss = []
nbr_epochs = 10
lr = 0.0005# 
weight_decay = 0

# Surrogate loss used for training
loss_fn = torch.nn.CrossEntropyLoss()
test_loss_fn = torch.nn.CrossEntropyLoss(reduction='sum')

optimizer = optim.Adam(model.parameters(), lr=lr,weight_decay=weight_decay)
#optimizer = optim.SGD(model.parameters(), lr=lr ,weight_decay=weight_decay)
#optimizer = optim.RMSprop(model.parameters(), lr=lr, weight_decay=weight_decay)

print('Training beginning...')
#start_time = time.time()

for epoch in range(1, nbr_epochs+1):
    print('Epoch ', epoch, ':')
    train(model, train_loader, optimizer, epoch)
    loss, acc = test(model, test_loader)
    
    # save results every epoch
    test_accuracy.append(acc)
    train_loss.append(loss)
    
#end_time = time.time()
#print('Training on ' + str(nbr_epochs) + ' epochs done in ', str(end_time-start_time),' seconds')

Training beginning...
Epoch  1 :

Test set: Average loss: 0.4272, Accuracy: 9843/12084 (81%)

Epoch  2 :

Test set: Average loss: 0.4473, Accuracy: 9775/12084 (81%)

Epoch  3 :

Test set: Average loss: 0.4231, Accuracy: 9876/12084 (82%)

Epoch  4 :

Test set: Average loss: 0.4185, Accuracy: 9879/12084 (82%)

Epoch  5 :

Test set: Average loss: 0.4156, Accuracy: 9878/12084 (82%)

Epoch  6 :

Test set: Average loss: 0.4288, Accuracy: 9850/12084 (82%)

Epoch  7 :

Test set: Average loss: 0.4142, Accuracy: 9899/12084 (82%)

Epoch  8 :

Test set: Average loss: 0.4276, Accuracy: 9869/12084 (82%)

Epoch  9 :

Test set: Average loss: 0.4525, Accuracy: 9776/12084 (81%)

Epoch  10 :

Test set: Average loss: 0.4256, Accuracy: 9830/12084 (81%)



In [15]:
with torch.no_grad():
    for inputs, target in test_loader:
        outputs = model(inputs)
        pred = outputs.max(1, keepdim=True)[1] 
        correct = pred.eq(target.view_as(pred)).sum().item()

        accuracy = correct / len(inputs)
        print('\nAccuracy: {}/{} ({:.0f}%)\n'.format(correct, len(inputs), 100. * accuracy))

Y_prob = F.softmax(outputs, dim=1)[:, 1]
Y_pred = outputs.max(1, keepdim=True)[1]

print(sum(Y_test), sum(Y_pred), sum(pred))


Accuracy: 9830/12084 (81%)

5749 tensor([6253]) tensor([6253])


<h1>Demographic Parity</h1>

<h2>Distribution of scores by sex</h2>

In [16]:
dpd = demographic_parity_difference(
    Y_test, Y_pred, sensitive_features=test_oh.sex,
)

dpr = demographic_parity_ratio(
    Y_test, Y_pred, sensitive_features=test_oh.sex,
)

print(f"Demographic parity difference: {dpd:.3f}")
print(f"Demographic parity ratio: {dpr:.3f}")

Demographic parity difference: 0.415
Demographic parity ratio: 0.429


In [17]:
test_edu_level_enum = test_oh.marital_status.map(bin_marital_status_level)

cdpd = conditional_demographic_parity_difference(
    Y_test, Y_pred, test_oh.sex, test_edu_level_enum,
)
cdpr = conditional_demographic_parity_ratio(
    Y_test, Y_pred, test_oh.sex, test_edu_level_enum,
)

print(f"Conditional demographic parity difference: {cdpd:.3f}")
print(f"Conditional demographic parity ratio: {cdpr:.3f}")

Conditional demographic parity difference: 0.429
Conditional demographic parity ratio: 0.422


<h1>Equalised Odds</h1>

<h2>Distribution of scores by sex</h2>

In [18]:
eod = equalized_odds_difference(
    Y_test, Y_pred, sensitive_features=test_oh.sex,
)
eor = equalized_odds_ratio(
    Y_test, Y_pred, sensitive_features=test_oh.sex,
)

print(f"Equalised odds difference: {eod:.3f}")
print(f"Equalised odds ratio: {eor:.3f}")

Equalised odds difference: 0.321
Equalised odds ratio: 0.242


<h1>Shapley based Neuron Pruning for Fairness</h1>

In [19]:
m_shapley_values = ns.calculate_shapley_values_fa(model, m_loader, 200)
print(m_shapley_values)
fm_shapley_values = ns.calculate_shapley_values_fa(model, fm_loader, 200)
print(fm_shapley_values)

[1.3166945e-01 2.1132304e-01 1.0634838e-01 ... 2.6115302e+02 3.7410034e+02
 0.0000000e+00]
[0.0000000e+00 2.1125238e-01 8.4636174e-02 ... 2.7463034e+02 3.6162741e+02
 0.0000000e+00]


In [32]:
diff_shap_values = m_shapley_values - fm_shapley_values
max_diff_shap_values_ind = np.argpartition(diff_shap_values, -90)[-90:]
diff_shap_values[max_diff_shap_values_ind]

array([  4.5969777,   4.6008453,   4.65987  ,   4.631687 ,   4.6606836,
         4.6752243,   4.713539 ,   4.759605 ,   4.794897 ,   4.815467 ,
         6.9915314,   5.67606  ,   5.8513794,   5.6466827,   5.784607 ,
         7.102893 ,   6.3799515,   5.892618 ,   5.058113 ,   5.4610977,
         7.452131 ,   5.9004517,   5.9830475,   6.3224106,   7.1398087,
         6.4203486,   5.5566444,   6.425995 ,   6.811697 ,   5.6460266,
         5.1214027,   4.956169 ,   5.622612 ,   5.456482 ,   7.0394897,
         5.4711304,   4.903763 ,   6.121045 ,   5.594913 ,   6.1698313,
         5.1408467,   5.0873475,   5.827774 ,   6.4503784,   4.8939457,
         5.054886 ,   5.4267373,   6.6896896,   6.2931075,   7.135277 ,
         5.1973953,   6.047806 ,   5.5499725,   6.9853897,   7.145598 ,
         6.176979 ,   4.9336243,   7.464119 ,   4.8598366,   5.058899 ,
         7.74572  ,   8.84137  ,  14.776009 ,   8.050171 ,  13.737697 ,
       333.34973  , 367.6952   ,  18.073124 ,  13.723068 ,  23.9

In [33]:
model_arr, model_slist = sim.get_net_arr(model)
model_arr[max_diff_shap_values_ind] = 0
updated_model = sim.get_arr_net(model, model_arr, model_slist)

In [34]:
with torch.no_grad():
    for inputs, target in test_loader:
        outputs = updated_model(inputs)
        pred = outputs.max(1, keepdim=True)[1] 
        correct = pred.eq(target.view_as(pred)).sum().item()

        accuracy = correct / len(inputs)
        print('\nAccuracy: {}/{} ({:.0f}%)\n'.format(correct, len(inputs), 100. * accuracy))
        

Y_prob = F.softmax(outputs, dim=1)[:, 1]
Y_pred = outputs.max(1, keepdim=True)[1]

print(sum(Y_test), sum(Y_pred), sum(pred))


Accuracy: 7695/12084 (64%)

5749 tensor([1562]) tensor([1562])


In [35]:
dpd = demographic_parity_difference(
    Y_test, Y_pred, sensitive_features=test_oh.sex,
)
dpr = demographic_parity_ratio(
    Y_test, Y_pred, sensitive_features=test_oh.sex,
)

print(f"Demographic parity difference: {dpd:.3f}")
print(f"Demographic parity ratio: {dpr:.3f}")

Demographic parity difference: 0.067
Demographic parity ratio: 0.587


In [36]:
test_marital_status_enum = test_oh.marital_status.map(bin_marital_status_level)

cdpd = conditional_demographic_parity_difference(
    Y_test, Y_pred, test_oh.sex, test_marital_status_enum,
)
cdpr = conditional_demographic_parity_ratio(
    Y_test, Y_pred, test_oh.sex, test_marital_status_enum,
)

print(f"Conditional demographic parity difference: {cdpd:.3f}")
print(f"Conditional demographic parity ratio: {cdpr:.3f}")

Conditional demographic parity difference: 0.054
Conditional demographic parity ratio: 0.714


In [37]:
eod = equalized_odds_difference(
    Y_test, Y_pred, sensitive_features=test_oh.sex,
)
eor = equalized_odds_ratio(
    Y_test, Y_pred, sensitive_features=test_oh.sex,
)

print(f"Equalised odds difference: {eod:.3f}")
print(f"Equalised odds ratio: {eor:.3f}")

Equalised odds difference: 0.002
Equalised odds ratio: 0.969
