In [1]:
import warnings

warnings.filterwarnings(action='ignore')

# Redactor Tutorial Code

## AdultCensus Dataset

## 1. Load Dataset

* You can load dataset using **Dataset** class in **preprocessing**.
* After loading, the data can be splitted and normalized by the method **split_dataset()**.

In [2]:
from preprocessing import Dataset
import numpy as np

np.random.seed(224)

test_size = 0.25
n_target = 10

Adult = Dataset('adult')
target_indices = np.random.choice(Adult.data.index, n_target)
(x_tr,y_tr), (x_te,y_te), (x_ta,y_ta), tr_scaler = Adult.split_dataset(test_size, target_indices)

## 2. Train Surrogate Models

* You can load and train surrogate models through **SurrogateModels** class.
* There is **show_performance** method to check the performance of the trained models.
    * Except accuracy score,there are other choices, such as **f1 score**, **confidence**, **roc auc**, and etc.

In [3]:
from model import SurrogateModels
    
model_names = ['nn_tanh_10_2','nn_relu_5_2', 'rf_entropy', 'gb', 'ada', 'log_reg']

s_models = SurrogateModels(model_names)
s_models.train_all(x_tr, y_tr)
s_models.show_performance([(x_tr,y_tr), (x_te,y_te), (x_ta,y_ta)],
                         cnames=['train', 'test','target'])
#s_models.cross_validation(x_tr, y_tr, k=3)

train models..


100%|██████████| 6/6 [00:52<00:00,  8.83s/it]
6it [00:01,  3.10it/s]


Unnamed: 0,train acc,test acc,target acc
s-nn_tanh_10_2,0.864,0.8471,1.0
s-nn_relu_5_2,0.8577,0.8546,1.0
s-rf_entropy,0.8521,0.8541,0.9
s-gb,0.8626,0.8652,1.0
s-ada,0.8608,0.8629,1.0
s-log_reg,0.8482,0.8534,1.0


## 3. Probabilistic Decision Boundary

* You can construct Probabilistic Decision Boundary using **PDB** class.
    1. Input the surrogate models trained on the previous step.
    2. Train PDB using all x data (without label)

In [4]:
from prob_decision_boundary import PDB

prob_dec = PDB(s_models.models)
x_all = np.concatenate([x_tr, x_te], axis=0)
prob_dec.fit_all(x_all)

100%|██████████| 6/6 [00:01<00:00,  3.19it/s]
100%|██████████| 6/6 [00:00<00:00, 12.72it/s]


In [5]:
sn_te_labels[sn_te_labels == -1] = 0
te_acc = sum(sn_te_labels==y_te)/len(y_te)

# Candidate Generation
## 4. GAN-based

* In this part, you can generate candidates using CTGAN.
* You can input tabular data without one-hot encoding.
* After generating through the method **generate()**, you can insert the nearest ones in the list.

In [6]:
from gen_disinfos import GANcandidates

adult = Adult.data
column_cat = Adult.column_cat
column_int = Adult.column_int
columns_1hot = Adult.data_1hot.columns

gan_gen = GANcandidates()
gan_gen.fit(adult, column_cat, column_int)

In [7]:
_ = gan_gen.generate()
gan_cand_list = gan_gen.nearest_points(tr_scaler, target_indices, columns_1hot)

In [8]:
from IPython.display import display
import pandas as pd

display(pd.concat([g.iloc[[0]]for g in gan_cand_list],ignore_index=True))

Unnamed: 0,age,workclass,fnlwgt,education,educational-num,marital-status,occupation,relationship,race,gender,capital-gain,capital-loss,hours-per-week,native-country
0,31,Private,194162,Bachelors,13,Never-married,Prof-specialty,Not-in-family,White,Male,0,0,39,United-States
1,33,Private,187087,HS-grad,8,Married-civ-spouse,Adm-clerical,Husband,White,Male,0,0,40,United-States
2,31,Private,222424,HS-grad,9,Never-married,Craft-repair,Not-in-family,White,Male,4,0,40,United-States
3,38,Local-gov,136242,Some-college,10,Married-civ-spouse,Exec-managerial,Husband,White,Male,2,0,39,United-States
4,44,Private,167444,Some-college,10,Never-married,Craft-repair,Not-in-family,White,Male,7,0,39,United-States
5,31,Private,185776,Bachelors,13,Never-married,Exec-managerial,Not-in-family,White,Female,12,0,59,United-States
6,39,Federal-gov,131020,Masters,15,Married-civ-spouse,Prof-specialty,Husband,White,Male,0,0,54,United-States
7,25,Private,193237,HS-grad,8,Married-civ-spouse,Sales,Wife,White,Female,0,0,21,United-States
8,24,Private,199887,Some-college,10,Never-married,Sales,Own-child,White,Female,0,0,39,United-States
9,40,Local-gov,381079,Doctorate,14,Never-married,Prof-specialty,Not-in-family,Black,Female,0,0,39,United-States


## 5. WM-based
* **WMcandidates** class is for generating candidates using Watermarking technique.
* You can generate them using the method **watermarking()**.

In [9]:
from gen_disinfos import WMcandidates, agg_disinfo
from tqdm import tqdm
  
adult_1hot = Adult.data_1hot
adult_label = Adult.label

wm_gen = WMcandidates(adult_1hot, adult_label, target_indices)
wm_cand_list = wm_gen.watermarking(tr_scaler, adult.columns, column_cat, column_int)

100%|██████████| 10/10 [04:54<00:00, 29.42s/it]


In [10]:
display(pd.concat([w.iloc[[0]]for w in wm_cand_list],ignore_index=True))

Unnamed: 0,age,workclass,fnlwgt,education,educational-num,marital-status,occupation,relationship,race,gender,capital-gain,capital-loss,hours-per-week,native-country
0,33,Private,222139,Bachelors,13,Never-married,Prof-specialty,Not-in-family,White,Male,0,0,40,United-States
1,32,Private,193663,HS-grad,9,Married-civ-spouse,Adm-clerical,Husband,White,Male,0,0,40,United-States
2,29,Private,241110,HS-grad,9,Never-married,Craft-repair,Not-in-family,White,Male,2510,0,40,United-States
3,38,Local-gov,115813,Some-college,10,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,40,United-States
4,32,Private,58981,Some-college,10,Never-married,Craft-repair,Not-in-family,White,Male,0,0,46,United-States
5,25,Private,191232,Bachelors,13,Never-married,Exec-managerial,Not-in-family,White,Female,0,0,60,United-States
6,53,Federal-gov,275064,Masters,14,Married-civ-spouse,Adm-clerical,Husband,White,Male,0,0,40,United-States
7,27,Private,276537,HS-grad,9,Married-civ-spouse,Sales,Wife,White,Female,0,0,21,United-States
8,25,Private,185504,Some-college,10,Never-married,Sales,Own-child,White,Female,0,0,40,United-States
9,67,Local-gov,196856,Doctorate,16,Never-married,Prof-specialty,Not-in-family,White,Female,7742,0,50,United-States


## 6. Aggregate All candidates

* In this part, you can collect all candidates you generated and pick only the points satisfying some conditions.
    1. The points should have a different label from the target.
    2. The points should be close to the target.
* **agg_disinfo()** function can filter the disinformations that satisfy the above conditions among all candidates.

In [11]:
x_dis, y_dis = [], []
for ti in range(n_target):
    xt, yt = x_ta[ti], y_ta[ti]
    wm_cand = wm_cand_list[ti]
    gan_cand = gan_cand_list[ti]
    candidates = pd.concat((wm_cand, gan_cand))
    
    x_tmp, y_tmp = agg_disinfo(prob_dec, candidates, tr_scaler, x_tr, y_tr, xt, yt, 
                               columns_1hot, n_disinfo=500)
    x_dis.extend(x_tmp)
    y_dis.extend(y_tmp)

100%|██████████| 6/6 [00:00<00:00, 244.65it/s]
100%|██████████| 6/6 [00:00<00:00, 258.09it/s]
100%|██████████| 6/6 [00:00<00:00, 203.63it/s]
100%|██████████| 6/6 [00:00<00:00, 169.90it/s]
100%|██████████| 6/6 [00:00<00:00, 263.27it/s]
100%|██████████| 6/6 [00:00<00:00, 189.23it/s]
100%|██████████| 6/6 [00:00<00:00, 148.29it/s]
100%|██████████| 6/6 [00:00<00:00, 239.57it/s]
100%|██████████| 6/6 [00:00<00:00, 242.32it/s]
100%|██████████| 6/6 [00:00<00:00, 180.17it/s]
100%|██████████| 6/6 [00:00<00:00, 148.30it/s]
100%|██████████| 6/6 [00:00<00:00, 168.43it/s]
100%|██████████| 6/6 [00:00<00:00, 158.88it/s]
100%|██████████| 6/6 [00:00<00:00, 251.14it/s]
100%|██████████| 6/6 [00:00<00:00, 284.87it/s]


## 7. Insert Disinformation

* Now,you can test the effect of the generated disinformations.
* In the class **VictimModels**, there are varous kinds of models that can be used as a victim model.

In [12]:
from model import VictimModels

v_models = VictimModels()
v_models.train_all(x_tr, y_tr)
result_clean = v_models.show_performance([(x_tr,y_tr), (x_te,y_te), (x_ta,y_ta)],
                         cnames=['train', 'test','target'])

train models..


100%|██████████| 19/19 [17:12<00:00, 54.34s/it] 
19it [02:48,  8.85s/it]


In [13]:
x_tr_dis = np.concatenate((x_tr, x_dis), axis=0)
y_tr_dis = np.concatenate((y_tr, y_dis), axis=0).astype(int)

In [14]:
v_models_dis = VictimModels()
v_models_dis.train_all(x_tr_dis, y_tr_dis)
result_dis = v_models_dis.show_performance([(x_tr,y_tr), (x_te,y_te), (x_ta,y_ta)],
                         cnames=['train', 'test','target'])

train models..


100%|██████████| 19/19 [25:36<00:00, 80.84s/it] 
19it [03:34, 11.29s/it]


* In addition, you can compare 2 results (before and after inserting) using **compare_result()** function.

In [15]:
from utils import compare_result

compare_result(result_clean, result_dis)

Unnamed: 0,mean,std
train acc,-1.670526,1.437832
test acc,-1.611579,1.559502
target acc,-31.052632,20.788548


## 8. Membership Inference Attack (MIA)

* At final step, you can test the victim models can be protected from MIA.
* Before training attack models, you have to construct the input data for attack model using **attack_input()** function.
* The attack input should consist of the loss and confidence value corresponding to each data points.

In [16]:
from sklearn.model_selection import train_test_split
from model import AttackModels, attack_input

np.random.seed(726)

vi = 1
victim_clean = v_models.models[vi]
x_mia, y_mia, x_mia_ta, y_mia_ta = attack_input(victim_clean, x_tr, y_tr, x_te, y_te, x_ta, y_ta)
x_mia_tr, x_mia_te, y_mia_tr, y_mia_te = train_test_split(x_mia, y_mia, test_size  = 0.25)

* Finally, you can train the various attack models, such as neural networks, decision tree, svm, and so on.
* After training, you can compare the results and check the effect of inserting disinformation.

In [17]:
model_names = ['nn_tanh_5_2','nn_relu_5_2', 'nn_identity', 'tree_gini', 'tree_entropy',
               'rf_gini', 'rf_entropy', 'ada', 'log_reg']

a_models = AttackModels(model_names)
a_models.train_all(x_mia_tr, y_mia_tr)
a_result_clean = a_models.show_performance([(x_mia_tr,y_mia_tr), (x_mia_te,y_mia_te), (x_mia_ta,y_mia_ta)],
                         cnames=['train attack', 'test attack','target attack'])
a_result_clean

train models..


100%|██████████| 9/9 [00:18<00:00,  2.08s/it]
9it [00:00, 10.96it/s]


Unnamed: 0,train attack acc,test attack acc,target attack acc
a-nn_tanh_5_2,0.515,0.5167,0.8
a-nn_relu_5_2,0.4993,0.502,0.8
a-nn_identity,0.5082,0.5026,0.8
a-tree_gini,0.5632,0.5303,0.6
a-tree_entropy,0.5637,0.5309,0.7
a-rf_gini,0.592,0.5268,0.7
a-rf_entropy,0.5884,0.528,0.6
a-ada,0.5338,0.5096,0.7
a-log_reg,0.5065,0.5027,0.7


In [18]:
victim_dis = v_models_dis.models[vi]
x_mia, y_mia, x_mia_ta, y_mia_ta = attack_input(victim_dis, x_tr, y_tr, x_te, y_te, x_ta, y_ta)
x_mia_tr, x_mia_te, y_mia_tr, y_mia_te = train_test_split(x_mia, y_mia, test_size  = 0.25)

In [19]:
a_models_dis = AttackModels(model_names)
a_models_dis.train_all(x_mia_tr, y_mia_tr)
a_result_dis = a_models_dis.show_performance([(x_mia_tr,y_mia_tr), (x_mia_te,y_mia_te), (x_mia_ta,y_mia_ta)],
                         cnames=['train attack', 'test attack','target attack'])
a_result_dis

train models..


100%|██████████| 9/9 [00:21<00:00,  2.37s/it]
9it [00:00, 10.78it/s]


Unnamed: 0,train attack acc,test attack acc,target attack acc
a-nn_tanh_5_2,0.5083,0.5089,0.7
a-nn_relu_5_2,0.5002,0.4994,0.2
a-nn_identity,0.5065,0.5061,0.7
a-tree_gini,0.5634,0.5006,0.6
a-tree_entropy,0.5616,0.5017,0.6
a-rf_gini,0.589,0.5128,0.6
a-rf_entropy,0.5913,0.5247,0.5
a-ada,0.5295,0.4987,0.6
a-log_reg,0.5065,0.5052,0.7


In [20]:
idxs = a_result_clean['target attack acc'] >= 0.5
compare_result(a_result_clean.loc[idxs], a_result_dis.loc[idxs])

Unnamed: 0,mean,std
train attack acc,-0.153333,0.291076
test attack acc,-1.016667,1.233025
target attack acc,-13.333333,18.027756
