In [1]:
import warnings

warnings.filterwarnings(action='ignore')

# COMPAS Tutorial Code

The process is exactly same with "Tutorial.ipynb", but only difference is the structure of models. 

In [21]:
from preprocessing import Dataset
import numpy as np

np.random.seed(224)

test_size = 0.25
n_target = 10

Compas = Dataset('compas')
target_indices = np.random.choice(Compas.data.index, n_target)
(x_tr,y_tr), (x_te,y_te), (x_ta,y_ta), tr_scaler = Compas.split_dataset(test_size, target_indices)

In [22]:
from model import SurrogateModels

model_names = ['nn_tanh_10_2','nn_relu_5_2', 'nn_relu_25_10', 'nn_identity',
               'rf_gini', 'rf_entropy', 'gb', 'ada', 'log_reg']

s_models = SurrogateModels(model_names)
s_models.train_all(x_tr, y_tr)
s_models.show_performance([(x_tr,y_tr), (x_te,y_te), (x_ta,y_ta)],
                         cnames=['train', 'test','target'])

train models..


100%|██████████| 9/9 [00:09<00:00,  1.07s/it]
9it [00:00, 26.72it/s]


Unnamed: 0,train acc,test acc,target acc
s-nn_tanh_10_2,0.7758,0.7589,0.8
s-nn_relu_5_2,0.7615,0.768,0.7
s-nn_relu_25_10,0.7768,0.7667,0.8
s-nn_identity,0.7526,0.7699,0.7
s-rf_gini,0.7589,0.7693,0.6
s-rf_entropy,0.7585,0.7673,0.6
s-gb,0.7596,0.7719,0.6
s-ada,0.7561,0.768,0.6
s-log_reg,0.7546,0.7712,0.7


In [23]:
from prob_decision_boundary import PDB

prob_dec = PDB(s_models.models)
x_all = np.concatenate([x_tr, x_te], axis=0)
prob_dec.fit_all(x_all)
sn_te_labels = prob_dec.predict(x_te)

100%|██████████| 9/9 [00:00<00:00, 36.38it/s]
100%|██████████| 9/9 [00:00<00:00, 77.12it/s]


In [24]:
sn_te_labels[sn_te_labels == -1] = 0
te_acc = sum(sn_te_labels==y_te)/len(y_te)
print('Test Acc: %s' % te_acc)

Test Acc: 0.7777057679844459


In [25]:
from gen_disinfos import GANcandidates

compas = Compas.data
column_cat = Compas.column_cat
column_int = Compas.column_int
columns_1hot = Compas.data_1hot.columns

gan_gen = GANcandidates()
gan_gen.fit(compas, column_cat, column_int)

In [26]:
_ = gan_gen.generate()
gan_cand_list = gan_gen.nearest_points(tr_scaler, target_indices, columns_1hot)

In [27]:
from IPython.display import display
import pandas as pd

display(pd.concat([g.iloc[[0]]for g in gan_cand_list],ignore_index=True))

Unnamed: 0,c-charge-degree,age-cat,race,sex,age,juv-fel-count,juv-misd-count,juv-other-count,priors-count,two-year-recid
0,M,25 - 45,African-American,Male,27,0,0,0,0,0
1,M,Less than 25,African-American,Male,22,0,0,0,0,1
2,M,25 - 45,African-American,Male,31,0,0,0,0,0
3,M,25 - 45,Hispanic,Male,44,0,0,0,1,1
4,F,25 - 45,Caucasian,Female,25,0,0,0,2,1
5,F,25 - 45,African-American,Male,26,0,0,0,2,0
6,M,25 - 45,Caucasian,Female,31,0,0,1,0,0
7,F,25 - 45,African-American,Male,44,0,0,0,16,1
8,F,Less than 25,African-American,Male,23,0,0,0,2,0
9,F,25 - 45,African-American,Male,28,0,0,0,4,1


In [28]:
from gen_disinfos import WMcandidates, agg_disinfo
from tqdm import tqdm
  
compas_1hot = Compas.data_1hot
compas_label = Compas.label

wm_gen = WMcandidates(compas_1hot, compas_label, target_indices)
wm_cand_list = wm_gen.watermarking(tr_scaler, compas.columns, column_cat, column_int)

100%|██████████| 10/10 [00:37<00:00,  3.74s/it]


In [29]:
display(pd.concat([w.iloc[[0]]for w in wm_cand_list],ignore_index=True))

Unnamed: 0,c-charge-degree,age-cat,race,sex,age,juv-fel-count,juv-misd-count,juv-other-count,priors-count,two-year-recid
0,M,25 - 45,African-American,Male,27,0,0,0,0,0
1,M,Less than 25,African-American,Male,22,0,0,0,0,1
2,M,25 - 45,African-American,Male,31,0,0,0,0,0
3,M,25 - 45,Hispanic,Male,43,0,0,0,1,1
4,F,25 - 45,Caucasian,Female,26,0,0,0,2,1
5,F,25 - 45,African-American,Male,26,0,0,0,2,0
6,M,25 - 45,Caucasian,Female,32,0,0,1,0,0
7,F,25 - 45,African-American,Male,43,0,0,0,18,1
8,F,Less than 25,African-American,Male,23,0,0,0,2,0
9,F,25 - 45,African-American,Male,28,0,0,0,4,1


In [30]:
from model import VictimModels

v_models = VictimModels()
v_models.train_all(x_tr, y_tr)
result_clean = v_models.show_performance([(x_tr,y_tr), (x_te,y_te), (x_ta,y_ta)],
                         cnames=['train', 'test','target'])

train models..


100%|██████████| 19/19 [00:48<00:00,  2.56s/it]
19it [00:02,  6.58it/s]


In [31]:
import pandas as pd

x_dis, y_dis = [], []
for ti in range(n_target):
    xt, yt = x_ta[ti], y_ta[ti]
    wm_cand = wm_cand_list[ti]
    gan_cand = gan_cand_list[ti]
    candidates = pd.concat((wm_cand, gan_cand))
    
    x_tmp, y_tmp = agg_disinfo(prob_dec, candidates, tr_scaler, x_tr, y_tr, xt, yt, 
                               columns_1hot, n_disinfo=200)
    x_dis.extend(x_tmp)
    y_dis.extend(y_tmp)

100%|██████████| 9/9 [00:00<00:00, 134.47it/s]
100%|██████████| 9/9 [00:00<00:00, 274.27it/s]
100%|██████████| 9/9 [00:00<00:00, 326.80it/s]
100%|██████████| 9/9 [00:00<00:00, 264.95it/s]
100%|██████████| 9/9 [00:00<00:00, 272.47it/s]
100%|██████████| 9/9 [00:00<00:00, 318.13it/s]
100%|██████████| 9/9 [00:00<00:00, 278.12it/s]
100%|██████████| 9/9 [00:00<00:00, 296.29it/s]
100%|██████████| 9/9 [00:00<00:00, 283.47it/s]
100%|██████████| 9/9 [00:00<00:00, 297.04it/s]
100%|██████████| 9/9 [00:00<00:00, 274.13it/s]
100%|██████████| 9/9 [00:00<00:00, 321.91it/s]
100%|██████████| 9/9 [00:00<00:00, 279.04it/s]
100%|██████████| 9/9 [00:00<00:00, 320.41it/s]
100%|██████████| 9/9 [00:00<00:00, 283.75it/s]
100%|██████████| 9/9 [00:00<00:00, 281.15it/s]
100%|██████████| 9/9 [00:00<00:00, 300.24it/s]


In [32]:
x_tr_dis = np.concatenate((x_tr, x_dis), axis=0)
y_tr_dis = np.concatenate((y_tr, y_dis), axis=0).astype(int)

In [33]:
v_models_dis = VictimModels()
v_models_dis.train_all(x_tr_dis, y_tr_dis)
result_dis = v_models_dis.show_performance([(x_tr,y_tr), (x_te,y_te), (x_ta,y_ta)],
                         cnames=['train', 'test','target'])

train models..


100%|██████████| 19/19 [01:09<00:00,  3.64s/it]
19it [00:03,  5.52it/s]


In [34]:
from utils import compare_result

compare_result(result_clean, result_dis)

Unnamed: 0,mean,std
train acc,-2.285789,1.266036
test acc,-2.297895,1.418386
target acc,-36.315789,16.401398


In [40]:
from sklearn.model_selection import train_test_split
from model import AttackModels, attack_input

np.random.seed(726)

vi = 1

victim_clean = v_models.models[vi]
x_mia, y_mia, x_mia_ta, y_mia_ta = attack_input(victim_clean, x_tr, y_tr, x_te, y_te, x_ta, y_ta)
x_mia_tr, x_mia_te, y_mia_tr, y_mia_te = train_test_split(x_mia, y_mia, test_size  = 0.25)

In [41]:
a_models = AttackModels()
a_models.train_all(x_mia_tr, y_mia_tr)
a_result_clean = a_models.show_performance([(x_mia_tr,y_mia_tr), (x_mia_te,y_mia_te), (x_mia_ta,y_mia_ta)],
                         cnames=['train attack', 'test attack','target attack'])

train models..


100%|██████████| 19/19 [00:12<00:00,  1.55it/s]
19it [00:02,  8.35it/s]


In [42]:
victim_dis = v_models_dis.models[vi]
x_mia, y_mia, x_mia_ta, y_mia_ta = attack_input(victim_dis, x_tr, y_tr, x_te, y_te, x_ta, y_ta)
x_mia_tr, x_mia_te, y_mia_tr, y_mia_te = train_test_split(x_mia, y_mia, test_size  = 0.25)

In [43]:
a_models_dis = AttackModels()
a_models_dis.train_all(x_mia_tr, y_mia_tr)
a_result_dis = a_models_dis.show_performance([(x_mia_tr,y_mia_tr), (x_mia_te,y_mia_te), (x_mia_ta,y_mia_ta)],
                         cnames=['train attack', 'test attack','target attack'])

train models..


100%|██████████| 19/19 [00:12<00:00,  1.51it/s]
19it [00:02,  8.29it/s]


In [44]:
idxs = a_result_clean['target attack acc'] >= 0.5
compare_result(a_result_clean.loc[idxs], a_result_dis.loc[idxs])

Unnamed: 0,mean,std
train attack acc,-0.153333,1.071952
test attack acc,2.806667,2.291935
target attack acc,-18.0,25.967012
