In [1]:
from train_dnn import get_data
from data import adult, compas, bank, german, loans_default, census
from models.trainer import STDTrainer, SenSeiTrainer
from models.model import MLP
from seeker.random import RandomSelectPairSeeker, RandomSelectSeeker, RangeGenSeeker, DistributionGenSeeker
from seeker.gradiant_based import WhiteboxSeeker, BlackboxSeeker, FoolSeeker
from distances.normalized_mahalanobis_distances import ProtectedSEDistances
from distances.sensitive_subspace_distances import LogisticRegSensitiveSubspace
from distances.binary_distances import BinaryDistance
from inFairness.distances import SquaredEuclideanDistance
from utils import UnfairMetric, load_model
import torch
import random

torch.set_default_dtype(torch.float64)

%load_ext autoreload
%autoreload 2

In [2]:
def flip_rate(data_name, trainer_name, sensitive_vars):
    use_sensitive_attr = True
    rho=10.0
    note = f'_rho={rho}' if trainer_name == 'sensei' else ''
    device = 'cpu'

    data_choices = {
        'adult': adult,
        'german': german,
        'loans_default': loans_default,
        'census': census
    }
    data = data_choices[data_name]

    if data_name in ['adult', 'german']:
        path = 'no_norm'
    elif data_name in ['census', 'loans_default']:
        path = 'new'

    dataset, train_dl, test_dl = get_data(data, 0, sensitive_vars)
    dataset.use_sensitive_attr = use_sensitive_attr
    feature_dim = dataset.dim_feature()
    output_dim = 2

    data_gen = data.Generator(use_sensitive_attr, sensitive_vars, device)
    data_gen.to(device)
    model = MLP(input_size=feature_dim, output_size=output_dim, data_gen=data_gen, n_layers=4, norm=False)

    load_model(model, data_name, trainer_name, use_sensitive_attr=use_sensitive_attr, \
            sensitive_vars=sensitive_vars, id=0, note=note, path=path)
    model = model.to(device)

    generated = data_gen.gen_by_range(100000)
    generated_ = generated.clone()
    generated_[:, dataset.sensitive_idxs] = 1- generated_[:, dataset.sensitive_idxs]
    y = model.get_prediction(generated)
    y_ = model.get_prediction(generated_)
    return torch.sum(y != y_).to(torch.float64)

In [3]:
import pandas as pd
df = pd.DataFrame(columns=['dataset', 'trainer', 'sensitive attr', 'flip-rate'])
for d in ['adult', 'census', 'german', 'loans_default']:
    sensitive_vars = {
        'adult': [['sex_Male'], ['race_White']],
        'census': [['sex_Male'], ['race_White']],
        'german': [['sex']],
        'loans_default': [['SEX']]
    }[d]
    for s in sensitive_vars:
        for t in ['std', 'sensei']:
            print(d, t, s)
            fr = flip_rate(data_name=d, trainer_name=t, sensitive_vars=s).item()
            df = pd.concat([df, pd.DataFrame.from_records([{'dataset': d, 'trainer': t, 'sensitive_vars': s, 'flip_rate': fr}])])
df

adult std ['sex_Male']
adult sensei ['sex_Male']
adult std ['race_White']
adult sensei ['race_White']
census std ['sex_Male']
census sensei ['sex_Male']
census std ['race_White']
census sensei ['race_White']
german std ['sex']
german sensei ['sex']
loans_default std ['SEX']
loans_default sensei ['SEX']


Unnamed: 0,dataset,trainer,sensitive attr,flip-rate,sensitive_vars,flip_rate
0,adult,std,,,[sex_Male],85.0
0,adult,sensei,,,[sex_Male],31.0
0,adult,std,,,[race_White],116.0
0,adult,sensei,,,[race_White],1.0
0,census,std,,,[sex_Male],104.0
0,census,sensei,,,[sex_Male],118.0
0,census,std,,,[race_White],14.0
0,census,sensei,,,[race_White],0.0
0,german,std,,,[sex],40.0
0,german,sensei,,,[sex],0.0
