In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import warnings
import numpy as np
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore', np.RankWarning)

In [3]:
from tqdm import tqdm
import os
import data_utils
import model_utils
from attack_utils import get_CSMIA_case_by_case_results, CSMIA_attack, LOMIA_attack, get_LOMIA_results
from data_utils import oneHotCatVars, filter_random_data_by_conf_score
from experiment_utils import MIAExperiment
from disparity_inference_utils import get_confidence_array, draw_confidence_array_scatter, get_indices_by_group_condition, get_corr_btn_sens_and_out_per_subgroup, get_slopes, get_angular_difference, calculate_stds, get_mutual_info_btn_sens_and_out_per_subgroup
from targeted_inference import get_angular_difference_range_for_subgroup,single_attribute_based_targeted_imputation, nested_attribute_based_targeted_imputation, single_attribute_based_targeted_ai, nested_attribute_based_targeted_ai
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import OneHotEncoder
from sklearn.neural_network._base import ACTIVATIONS
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.metrics import roc_curve, auc, roc_auc_score, accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
from sklearn.decomposition import PCA
from sklearn.inspection import permutation_importance
import matplotlib.pyplot as plt
import seaborn as sns
import tabulate
import pickle
# import utils
import copy
from scipy.stats import kendalltau, spearmanr

import matplotlib as mpl

# Setting the font family, size, and weight globally
mpl.rcParams['font.family'] = 'DejaVu Sans'
mpl.rcParams['font.size'] = 8
mpl.rcParams['font.weight'] = 'light'

# Load Dataset

In [4]:
experiment_root = MIAExperiment(sampling_condition_dict = 
    {
            'subgroup_col_name': 'ST',
            'n': 1000,
    }, random_state = 0,
    shortname = f"Corr_btn_sens_and_output_for_ST_ranging_from_0_to_-0.5_random_state_0"
)

# Train/Load Models

In [5]:
depth_dict = {2: [16, 8], 3: [32, 16, 8], 4: [64, 32, 16, 8]}

experiments = {}

save_model = True

for depth in depth_dict:
    print(f"Training classifier for experiment: {experiment_root.shortname} with depth: {depth}")
    postfix = f"_depth_{depth}" if depth != 3 else ""
    experiment = copy.deepcopy(experiment_root)
    experiments[depth] = experiment
    try:
        experiment.clf = model_utils.load_model(f'<PATH_TO_MODEL>/{experiment.ds.ds.filenameroot}_target_model{postfix}.pkl')
        print(f"Loaded classifier for experiment from file: {experiment}")
    except:
        # clf = model_utils.get_model(max_iter=500, hidden_layer_sizes=(256, 256))
        experiment.clf = model_utils.get_model(max_iter=500, hidden_layer_sizes=depth_dict[depth])
        experiment.clf.fit(experiment.X_train, experiment.y_tr_onehot)

        if save_model:
            model_utils.save_model(experiment.clf, f'<PATH_TO_MODEL>/{experiment.ds.ds.filenameroot}_target_model{postfix}.pkl')



Training classifier for experiment: Corr_btn_sens_and_output_for_ST_ranging_from_0_to_-0.5_random_state_0 with depth: 2
Training classifier for experiment: Corr_btn_sens_and_output_for_ST_ranging_from_0_to_-0.5_random_state_0 with depth: 3
Loaded classifier for experiment from file: Census19_subgroup_col_name_ST_n_1000_rs0
Training classifier for experiment: Corr_btn_sens_and_output_for_ST_ranging_from_0_to_-0.5_random_state_0 with depth: 4


In [6]:
for experiment_key in experiments:
    experiment = experiments[experiment_key]
    experiment.nested_attrib_cols = ['ST', 'SCHL', 'RAC1P', 'SEX']
    experiment.single_kappas = [1, 0.75, 0.5, 0.375, 0.25, 0.1, 0.05]
    experiment.nested_kappas = [0.5, 0.375, 0.25, 0.1]
    experiment.subgroup_col_name = 'ST'

In [7]:
for experiment_key in experiments:
    experiment = experiments[experiment_key]
    experiment.confidence_array = get_confidence_array(experiment, experiment.X_train, experiment.y_te, experiment.clf)
    sens_pred, case_indices = CSMIA_attack(experiment.clf, experiment.X_train, experiment.y_tr, experiment.ds.ds.meta)
    case_2_indices = case_indices[2]
    experiment.case_2_indices = case_2_indices
    experiment.confidence_array_case_2 = experiment.confidence_array[case_2_indices, :]
    experiment.X_case_2 = experiment.X_train.loc[case_2_indices].copy().reset_index(drop=True)
    experiment.y_case_2 = experiment.y_tr.ravel()[case_2_indices]
    experiment.sens_pred = sens_pred
    experiment.sens_pred_LOMIA = LOMIA_attack(experiment, experiment.clf, experiment.X_train, experiment.y_tr, experiment.ds.ds.meta)

# Single Attribute-based Targeted Attribute Inference

In [8]:
print('Performance of Single Attribute Based Targeted AI: CSMIA')
for experiment_key in experiments:
    experiment = experiments[experiment_key]
    print(f'\nDepth {experiment_key}\n')
    print(single_attribute_based_targeted_ai(experiment, experiment.sens_pred, subgroup_col_name=experiment.subgroup_col_name, kappas=experiment.single_kappas))

Performance of Single Attribute Based Targeted AI: CSMIA

Depth 2

       attack_accuracy
0.050            72.50
0.100            72.75
0.250            70.89
0.375            69.52
0.500            67.11
0.750            64.19
1.000            61.28

Depth 3

       attack_accuracy
0.050            71.57
0.100            69.59
0.250            67.27
0.375            65.87
0.500            64.55
0.750            62.14
1.000            60.24

Depth 4

       attack_accuracy
0.050            71.42
0.100            69.86
0.250            67.62
0.375            65.98
0.500            64.64
0.750            63.02
1.000            61.44


In [9]:
print('Performance of Single Attribute Based Targeted AI: LOMIA')
for experiment_key in experiments:
    experiment = experiments[experiment_key]
    print(f'\nDepth {experiment_key}\n')
    print(single_attribute_based_targeted_ai(experiment, experiment.sens_pred_LOMIA, subgroup_col_name=experiment.subgroup_col_name, kappas=experiment.single_kappas))

Performance of Single Attribute Based Targeted AI: LOMIA

Depth 2

       attack_accuracy
0.050            73.37
0.100            73.21
0.250            71.01
0.375            69.59
0.500            67.60
0.750            64.10
1.000            60.51

Depth 3

       attack_accuracy
0.050            73.84
0.100            72.96
0.250            70.39
0.375            69.04
0.500            67.86
0.750            65.95
1.000            62.70

Depth 4

       attack_accuracy
0.050            72.83
0.100            73.13
0.250            70.29
0.375            68.39
0.500            66.77
0.750            64.53
1.000            61.74


# Nested Attribute-based Targeted Attribute Inference

In [10]:
print('Performance of Nested Attribute Based Targeted AI: CSMIA')
for experiment_key in experiments:
    experiment = experiments[experiment_key]
    print(f'\nDepth {experiment_key}\n')
    print(nested_attribute_based_targeted_ai(experiment, experiment.sens_pred, subgroup_cols=experiment.nested_attrib_cols, kappas=experiment.nested_kappas))

Performance of Nested Attribute Based Targeted AI: CSMIA

Depth 2

       Depth  attack_accuracy
1.000      0            61.28
0.500      1            67.48
0.375      2            67.93
0.250      3            67.23
0.100      4            70.48

Depth 3

       Depth  attack_accuracy
1.000      0            60.24
0.500      1            64.89
0.375      2            64.39
0.250      3            64.28
0.100      4            68.31

Depth 4

       Depth  attack_accuracy
1.000      0            61.44
0.500      1            64.91
0.375      2            64.75
0.250      3            65.20
0.100      4            67.67


In [11]:
print('Performance of Nested Attribute Based Targeted AI: LOMIA')
for experiment_key in experiments:
    experiment = experiments[experiment_key]
    print(f'\nDepth {experiment_key}\n')
    print(nested_attribute_based_targeted_ai(experiment, experiment.sens_pred_LOMIA, subgroup_cols=experiment.nested_attrib_cols, kappas=experiment.nested_kappas))

Performance of Nested Attribute Based Targeted AI: LOMIA

Depth 2

       Depth  attack_accuracy
1.000      0            60.51
0.500      1            67.96
0.375      2            69.05
0.250      3            68.01
0.100      4            71.07

Depth 3

       Depth  attack_accuracy
1.000      0            62.70
0.500      1            68.06
0.375      2            67.98
0.250      3            67.95
0.100      4            71.24

Depth 4

       Depth  attack_accuracy
1.000      0            61.74
0.500      1            67.06
0.375      2            66.87
0.250      3            66.72
0.100      4            69.94
