In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import warnings

warnings.filterwarnings('ignore')

In [3]:
from tqdm import tqdm
import os
import data_utils
import model_utils
from attack_utils import get_CSMIA_case_by_case_results, CSMIA_attack, LOMIA_attack
from data_utils import oneHotCatVars, filter_random_data_by_conf_score
from experiment_utils import MIAExperiment
from disparity_inference_utils import get_confidence_array, draw_confidence_array_scatter, get_indices_by_group_condition, get_corr_btn_sens_and_out_per_subgroup, get_slopes, get_angular_difference, calculate_stds, get_mutual_info_btn_sens_and_out_per_subgroup
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import OneHotEncoder
from sklearn.neural_network._base import ACTIVATIONS
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.metrics import roc_curve, auc, roc_auc_score, accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
from sklearn.decomposition import PCA
from sklearn.inspection import permutation_importance
from fairlearn.metrics import equalized_odds_difference, demographic_parity_difference
import matplotlib.pyplot as plt
import seaborn as sns
import tabulate
import pickle
# import utils
import copy

import matplotlib as mpl

# Setting the font family, size, and weight globally
mpl.rcParams['font.family'] = 'DejaVu Sans'
mpl.rcParams['font.size'] = 8
mpl.rcParams['font.weight'] = 'light'

In [4]:
i = -0.4
j = -0.1
experiment = MIAExperiment(sampling_condition_dict = 
    {
            'subgroup_col_name': 'SEX',
            'n': 25000,
            'correlation_by_subgroup_values': [i, j],
            # 'fixed_corr_in_test_data': True
    }, shortname = f"Corr_btn_sens_and_output_for_male_({i})_for_female_({j})", random_state = 0
)

  0%|          | 0/2 [00:00<?, ?it/s]

{0: {(0, 1): 8750, (0, 0): 3750, (1, 1): 3750, (1, 0): 8750}, 1: {}}


 50%|█████     | 1/2 [00:01<00:01,  1.02s/it]

{0: {(0, 1): 8750, (0, 0): 3750, (1, 1): 3750, (1, 0): 8750}, 1: {(0, 1): 6875, (0, 0): 5625, (1, 1): 5625, (1, 0): 6875}}


100%|██████████| 2/2 [00:02<00:00,  1.01s/it]


[12500, 12500, 12500, 12500]


In [5]:
save_model=True
print(f"Training classifier for experiment: {experiment}")
try:
    experiment.clf = model_utils.load_model(f'<PATH_TO_MODEL>/{experiment.ds.ds.filenameroot}_target_model.pkl')
    print(f"Loaded classifier for experiment from file: {experiment}")
except:
    base_model = model_utils.get_model(max_iter=500)
    experiment.clf = copy.deepcopy(base_model)
    experiment.clf.fit(experiment.X_train, experiment.y_tr_onehot)

    if save_model:
        model_utils.save_model(experiment.clf, f'<PATH_TO_MODEL>/{experiment.ds.ds.filenameroot}_target_model.pkl')

Training classifier for experiment: Census19_subgroup_col_name_SEX_n_25000_correlation_by_subgroup_values_[-0.4, -0.1]_rs0
Loaded classifier for experiment from file: Census19_subgroup_col_name_SEX_n_25000_correlation_by_subgroup_values_[-0.4, -0.1]_rs0


In [7]:
dataset = torch.utils.data.TensorDataset(torch.tensor(experiment.X_train.values).float(), torch.tensor(experiment.y_tr_onehot).float())
train_loader = torch.utils.data.DataLoader(dataset, batch_size=128, shuffle=True)

In [6]:
list(experiment.X_train.columns).index('SEX_0')

38

In [8]:
class PortedMLPClassifier(nn.Module):
    def __init__(self, n_in_features=37, n_out_features=2):
        super(PortedMLPClassifier, self).__init__()
        layers = [
            nn.Linear(in_features=n_in_features, out_features=32),
            nn.ReLU(),
            nn.Linear(in_features=32, out_features=16),
            nn.ReLU(),
            nn.Linear(in_features=16, out_features=8),
            nn.ReLU(),
            nn.Linear(in_features=8, out_features=n_out_features),
            nn.Softmax(dim=1)
        ]
        self.layers = nn.Sequential(*layers)

    def forward(self, x: torch.Tensor):
        return self.layers(x)
    
    def predict_proba(self, x: torch.Tensor):
        return self.forward(x)
    
class MLPClassifierMutualInfoReg(nn.Module):
    def __init__(self, n_in_features=37, n_feat_dim=10, n_out_features=2):
        super(MLPClassifierMutualInfoReg, self).__init__()
        layers = [
            nn.Linear(in_features=n_in_features, out_features=32),
            nn.ReLU(),
            nn.Linear(in_features=32, out_features=16),
            nn.ReLU(),
            nn.Linear(in_features=16, out_features=8),
            nn.ReLU(),
            nn.Linear(in_features=8, out_features=n_feat_dim),
            # nn.Softmax(dim=1)
        ]
        self.layers = nn.Sequential(*layers)
        self.k = n_feat_dim//2
        self.st_layer = nn.Linear(in_features=n_feat_dim, out_features=self.k*2)
        self.classifier = nn.Linear(in_features=self.k, out_features=n_out_features)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x: torch.Tensor):
        x = self.layers(x)
        
        statis = self.st_layer(x)
        mu, std = statis[:, :self.k], statis[:, self.k:]
        std = torch.functional.F.softplus(std-5)
        eps = torch.FloatTensor(std.size()).normal_().to(x.device)
        x = mu + eps * std
        x = self.classifier(x)
        x = self.softmax(x)
        return x, mu, std
    
    def predict_proba(self, x: torch.Tensor):
        return self.forward(x)[0]

def train_mir_classifier(model, train_loader, beta=0.1, selective_reg=False):
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    for epoch in tqdm(range(10)):
        for batch_idx, (data, target) in enumerate(train_loader):
            model.train()
            optimizer.zero_grad()
            data, target = data.to('mps'), target.to('mps')
            output, mu, std = model(data)
            info_loss = - 0.5 * (1 + 2 * (std+1e-7).log() - mu.pow(2) - std.pow(2)).sum(dim=1)
            if selective_reg:
                info_loss = info_loss * data[:, 38]
            info_loss = info_loss.mean()
            loss = nn.BCELoss()(output, target) + beta * info_loss
            loss.backward()
            optimizer.step()

# test on test set
def test_mir(model, X_test, y_te_onehot):
    x_te = X_test.values
    dataset = torch.utils.data.TensorDataset(torch.tensor(x_te).float(), torch.tensor(y_te_onehot).float())
    test_loader = torch.utils.data.DataLoader(dataset, batch_size=x_te.shape[0], shuffle=False)

    model.eval()
    y_pred = []
    y_true = []
    for batch_idx, (data, target) in enumerate(test_loader):
        data, target = data.to('mps'), target.to('mps')
        output, _, _ = model(data)
        y_pred.append(output.cpu().detach().numpy())
        y_true.append(target.cpu().detach().numpy())

    y_pred = np.concatenate(y_pred)
    y_true = np.concatenate(y_true)
    y_pred = np.argmax(y_pred, axis=1)
    y_true = np.argmax(y_true, axis=1)

    return accuracy_score(y_true, y_pred)

In [17]:
betas = [0.001, 0.01, 0.1, 0.2, 0.3, 0.4, 0.5]

model_zero_by_beta = {}

for beta in betas:
    try:
        model = MLPClassifierMutualInfoReg(n_in_features=experiment.X_train.shape[1], n_feat_dim=10, n_out_features=experiment.y_tr_onehot.shape[1]).to('mps')
        model.load_state_dict(torch.load(f"<PATH_TO_MODEL>/{experiment.ds.ds.filenameroot}_mutual_info_reg_{beta}.pt"))
    except:
        model = MLPClassifierMutualInfoReg(n_in_features=experiment.X_train.shape[1], n_feat_dim=10, n_out_features=experiment.y_tr_onehot.shape[1]).to('mps')
        train_mir_classifier(model, train_loader, beta=beta)
        torch.save(model.state_dict(), f"<PATH_TO_MODEL>/{experiment.ds.ds.filenameroot}_mutual_info_reg_{beta}.pt")
    
    model_zero_by_beta[beta] = model


100%|██████████| 10/10 [00:31<00:00,  3.11s/it]


In [18]:
betas = [0.001, 0.01, 0.1, 0.2, 0.3, 0.4, 0.5]

model_by_beta = {}

for beta in betas:
    try:
        model = MLPClassifierMutualInfoReg(n_in_features=experiment.X_train.shape[1], n_feat_dim=10, n_out_features=experiment.y_tr_onehot.shape[1]).to('mps')
        model.load_state_dict(torch.load(f"<PATH_TO_MODEL>/{experiment.ds.ds.filenameroot}_disp_aware_mutual_info_reg_{beta}.pt"))
    except:
        model = MLPClassifierMutualInfoReg(n_in_features=experiment.X_train.shape[1], n_feat_dim=10, n_out_features=experiment.y_tr_onehot.shape[1]).to('mps')
        train_mir_classifier(model, train_loader, beta=beta, selective_reg=True)
        torch.save(model.state_dict(), f"<PATH_TO_MODEL>/{experiment.ds.ds.filenameroot}_disp_aware_mutual_info_reg_{beta}.pt")
    
    model_by_beta[beta] = model


100%|██████████| 10/10 [00:29<00:00,  2.93s/it]


In [19]:
perf_dict = {}

subgroup_vals_tr = experiment.X_train[['SEX_0']].values.flatten()
num_of_subgroups = 2

for beta in model_zero_by_beta:
    model = model_zero_by_beta[beta]
    test_acc = test_mir(model, experiment.X_test, experiment.y_te_onehot)
    sens_pred_CSMIA, _ = CSMIA_attack(model, experiment.X_train, experiment.y_tr, experiment.ds.ds.meta)
    sens_pred_LOMIA = LOMIA_attack(experiment, model, experiment.X_train, experiment.y_tr, experiment.ds.ds.meta)
    correct_indices_CSMIA = (sens_pred_CSMIA == experiment.sens_val_ground_truth)
    correct_indices_LOMIA = (sens_pred_LOMIA == experiment.sens_val_ground_truth)
    perf_dict[('zero', test_acc)] = {
        'test_acc': test_acc,
        'ASRD_CSMIA': round(100 * np.ptp([correct_indices_CSMIA[subgroup_vals_tr==i].mean() for i in range(num_of_subgroups)]), 2),
        'ASRD_LOMIA': round(100 * np.ptp([correct_indices_LOMIA[subgroup_vals_tr==i].mean() for i in range(num_of_subgroups)]), 2),
        'beta': beta,
        'type': 'MIR'
    }

for beta in model_by_beta:
    model = model_by_beta[beta]
    test_acc = test_mir(model, experiment.X_test, experiment.y_te_onehot)
    sens_pred_CSMIA, _ = CSMIA_attack(model, experiment.X_train, experiment.y_tr, experiment.ds.ds.meta)
    sens_pred_LOMIA = LOMIA_attack(experiment, model, experiment.X_train, experiment.y_tr, experiment.ds.ds.meta)
    correct_indices_CSMIA = (sens_pred_CSMIA == experiment.sens_val_ground_truth)
    correct_indices_LOMIA = (sens_pred_LOMIA == experiment.sens_val_ground_truth)
    perf_dict[('zero', test_acc)] = {
        'test_acc': test_acc,
        'ASRD_CSMIA': round(100 * np.ptp([correct_indices_CSMIA[subgroup_vals_tr==i].mean() for i in range(num_of_subgroups)]), 2),
        'ASRD_LOMIA': round(100 * np.ptp([correct_indices_LOMIA[subgroup_vals_tr==i].mean() for i in range(num_of_subgroups)]), 2),
        'beta': beta,
        'type': 'DAMIR'
    }

In [20]:
perf_df = pd.DataFrame.from_dict(perf_dict, orient='index')

In [21]:
perf_df

Unnamed: 0,Unnamed: 1,test_acc,ASRD_CSMIA,ASRD_LOMIA,beta,type
zero,0.72594,0.72594,15.38,14.35,0.001,MIR
zero,0.73786,0.73786,15.97,15.16,0.01,MIR
zero,0.72486,0.72486,8.99,18.86,0.1,MIR
zero,0.63254,0.63254,3.98,17.55,0.2,MIR
zero,0.45858,0.45858,0.34,0.44,0.3,MIR
zero,0.51934,0.51934,0.24,0.47,0.4,MIR
zero,0.57708,0.57708,0.39,2.26,0.5,MIR
zero,0.69614,0.69614,13.34,14.76,0.001,DAMIR
zero,0.74882,0.74882,12.77,15.25,0.01,DAMIR
zero,0.73604,0.73604,6.34,14.31,0.1,DAMIR
