In [1]:
import sys

In [2]:
#!{sys.executable} -m pip install ipynb

In [3]:
# !{sys.executable} -m pip install mlxtend

In [4]:
import numpy as np
from nilib import *

#import os
#import json
#import glob
#import pickle
import dill
import pandas as pd
#import matplotlib.pyplot as plt
#import seaborn as sns
import lightgbm
#import functools
import parallel_robust_forest
#from os import listdir
#from os.path import isfile, join
#from sklearn.svm import SVC
#from sklearn.model_selection import GridSearchCV
#from sklearn.metrics import roc_auc_score, f1_score, confusion_matrix, precision_score, recall_score
from nilib import *
from sklearn.ensemble import BaggingClassifier


from mlxtend.evaluate import mcnemar

# Load attacked datasets

## Load an attacked dataset with a specific budget

In [5]:
def load_attacked_dataset(DATASET_NAME, budget):
    DATASET_DIR="../data/{}".format(DATASET_NAME)
    ATK_DIR=DATASET_DIR + "/attacks"
    TRAINING_FILENAME_ATT=ATK_DIR + "/" + "train_B{}.atks.bz2"
    VALIDATION_FILENAME_ATT=ATK_DIR + "/" + "valid_B{}.atks.bz2"
    TEST_FILENAME_ATT=ATK_DIR + "/" + "test_B{}.atks.bz2"

    # load train/valid/test (attacked)
    train_att, valid_att, test_att = load_atk_train_valid_test(TRAINING_FILENAME_ATT.format(budget), 
                                                                  VALIDATION_FILENAME_ATT.format(budget), 
                                                                  TEST_FILENAME_ATT.format(budget))

    test_groups = test_att['instance_id'].value_counts().sort_index().values
    test_att = test_att.iloc[:, 1:]

    valid_groups = valid_att['instance_id'].value_counts().sort_index().values
    valid_att = valid_att.iloc[:, 1:]

    train_groups = train_att['instance_id'].value_counts().sort_index().values
    train_att = train_att.iloc[:, 1:]
    
    return train_att, train_groups, valid_att, valid_groups, test_att, test_groups

# Load and Eval Model

In [6]:
def load_model(model_file):
    model = None
    try:
        model = lightgbm.Booster(model_file=model_file)
    except:
        print("LightGBM loading exception")
        try:
            with open(model_file, 'rb') as mf:
                model = dill.load(mf)
                print(model)
                model.n_jobs = 16
        except Exception as e:
            print(e)
            print("Dill loading exception")
            pass
    
    return model

In [7]:
def binarize(preds):
    if np.min(preds)<-0.001:
        return np.where(preds>=0,  1.0, -1.0)
    else:
        return np.where(preds>=.5, 1.0, -1.0)
    
def model_predict(model,test_set):
    X = test_set.iloc[:,:-1].values

    if isinstance(model, BaggingClassifier):
        return model.predict_proba(X)[:,1]
    else:
        return model.predict(test_set.iloc[:,:-1])

def model_worst_predict(model, test_set, test_groups):
    labels = test_set.iloc[:,-1].values
    preds  = model_predict(model, test_set)
    
    offset = 0
    true_labels = []
    worst_predictions = []
    
    for g in test_groups:
        true_label = labels[offset]
        true_labels.append(true_label)
        predictions_att = preds[offset:offset+g]
        if true_label == 1:
            worst_predictions.append(np.min(predictions_att))
        else:
            worst_predictions.append(np.max(predictions_att))
    
        offset += g

    return np.array(true_labels), np.array(worst_predictions)

def eval_model_ua(model, test, test_groups):
    y_true, y_pred = model_worst_predict(model, test, test_groups)
    y_pred = binarize(y_pred)
    
    correct_preds = y_true == y_pred
    correct_preds = correct_preds.astype(int)
    
    return correct_preds

# McNemar Test

In [8]:
def mcnemar_test(robust_model, base_model,
                 test_set, test_set_groups):
    
    our_model = load_model(robust_model)
    ref_model = load_model(base_model)

    our_model_correct = eval_model_ua(our_model, test_set, test_set_groups)
    ref_model_correct = eval_model_ua(ref_model, test_set, test_set_groups)

    contingency_matrix = np.array( [ [np.sum(our_model_correct*ref_model_correct), 
                                        np.sum((1-our_model_correct)*ref_model_correct)],
                                     [np.sum((our_model_correct)*(1-ref_model_correct)),
                                        np.sum((1-our_model_correct)*(1-ref_model_correct))] 
                                   ])

    print(contingency_matrix)

    chi2, p = mcnemar(ary=contingency_matrix, corrected=True)

    print('chi-squared:', chi2)
    print('p-value:', p)

## WINE

In [11]:
DATASET_NAME="wine"

# Final Models
robust_models = ["../out/models/wine/par-robust_wine_L-sse_B20_T20_D8_I20.model",  # 20 Trees
                 "../out/models/wine/par-robust_wine_L-sse_B40_T100_D8_I20.model",
                 "../out/models/wine/par-robust_wine_L-sse_B60_T100_D8_I20.model",
                 "../out/models/wine/par-robust_wine_L-sse_B80_T100_D8_I20.model",
                 "../out/models/wine/par-robust_wine_L-sse_B100_T20_D8_I20.model", # 20 Trees
                 "../out/models/wine/par-robust_wine_L-sse_B120_T20_D8_I20.model", # MISSINGS
                ]

base_models =   [  "../out/models/wine/adv-boosting_wine_B20_T100_S0050_L256_R99.model",
                   "../out/models/wine/adv-boosting_wine_B40_T100_S0050_L256_R100.model",
                   "../out/models/wine/adv-boosting_wine_B60_T100_S0050_L256_R99.model",
                   "../out/models/wine/adv-boosting_wine_B80_T100_S0050_L256_R100.model",
                   "../out/models/wine/adv-boosting_wine_B100_T100_S0050_L256_R100.model",
                   "../out/models/wine/adv-boosting_wine_B120_T100_S0050_L256_R100.model"
             ]

budgets = [20,40,60,80,100,120]

for budget, robust_model, base_model in zip(budgets, robust_models, base_models):

    _,_,_,_, test_set, test_set_groups = load_attacked_dataset(DATASET_NAME, budget)
    assert len(test_set)==np.sum(test_set_groups), "Incorrect groups"
    
    mcnemar_test(robust_model, base_model, test_set, test_set_groups)

Pre-processing original files...
Loading: ../data/wine/attacks/train_B20.atks.bz2
Loading: ../data/wine/attacks/valid_B20.atks.bz2
Loading: ../data/wine/attacks/test_B20.atks.bz2
Train/Valid/Test sizes: (21361, 14) (2933, 14) (6176, 14)
Train/Valid/Test split: 0.70 0.10 0.20
   ... with instance ids
CatFX: []
Train/Valid/Test sizes: (18278, 14) (6016, 14) (6176, 14)
Train/Valid/Test split: 0.60 0.20 0.20
Saving processed files *.atks.bz2
LightGBM loading exception
BaggingClassifier(base_estimator=RobustDecisionTree(affine=None, attacker=None,
                                                    feature_blacklist={},
                                                    max_depth=8,
                                                    max_features=0.8,
                                                    max_samples=0.8,
                                                    min_instances_per_node=20,
                                                    replace_features=False,
                  



[[908  83]
 [ 74 235]]
chi-squared: 0.40764331210191085
p-value: 0.5231680971364009
Pre-processing original files...
Loading: ../data/wine/attacks/train_B40.atks.bz2
Loading: ../data/wine/attacks/valid_B40.atks.bz2
Loading: ../data/wine/attacks/test_B40.atks.bz2
Train/Valid/Test sizes: (72409, 14) (10145, 14) (20817, 14)
Train/Valid/Test split: 0.70 0.10 0.20
   ... with instance ids
CatFX: []
Train/Valid/Test sizes: (61947, 14) (20607, 14) (20817, 14)
Train/Valid/Test split: 0.60 0.20 0.20
Saving processed files *.atks.bz2
LightGBM loading exception




BaggingClassifier(base_estimator=RobustDecisionTree(affine=None, attacker=None,
                                                    feature_blacklist={},
                                                    max_depth=8,
                                                    max_features=0.8,
                                                    max_samples=0.8,
                                                    min_instances_per_node=20,
                                                    replace_features=False,
                                                    replace_samples=False,
                                                    seed=0,
                                                    split_optimizer=None,
                                                    tree_id=0),
                  bootstrap=False, bootstrap_features=False, max_features=1.0,
                  max_samples=1.0, n_estimators=100, n_jobs=None,
                  oob_score=False, random_state=None, verbose=0,
    



BaggingClassifier(base_estimator=RobustDecisionTree(affine=None, attacker=None,
                                                    feature_blacklist={},
                                                    max_depth=8,
                                                    max_features=0.8,
                                                    max_samples=0.8,
                                                    min_instances_per_node=20,
                                                    replace_features=False,
                                                    replace_samples=False,
                                                    seed=0,
                                                    split_optimizer=None,
                                                    tree_id=0),
                  bootstrap=False, bootstrap_features=False, max_features=1.0,
                  max_samples=1.0, n_estimators=100, n_jobs=None,
                  oob_score=False, random_state=None, verbose=0,
    



BaggingClassifier(base_estimator=RobustDecisionTree(affine=None, attacker=None,
                                                    feature_blacklist={},
                                                    max_depth=8,
                                                    max_features=0.8,
                                                    max_samples=0.8,
                                                    min_instances_per_node=20,
                                                    replace_features=False,
                                                    replace_samples=False,
                                                    seed=0,
                                                    split_optimizer=None,
                                                    tree_id=0),
                  bootstrap=False, bootstrap_features=False, max_features=1.0,
                  max_samples=1.0, n_estimators=100, n_jobs=None,
                  oob_score=False, random_state=None, verbose=0,
    



[[865  47]
 [104 284]]
chi-squared: 20.7682119205298
p-value: 5.183626513798672e-06
Pre-processing original files...
Loading: ../data/wine/attacks/train_B120.atks.bz2
Loading: ../data/wine/attacks/valid_B120.atks.bz2
Loading: ../data/wine/attacks/test_B120.atks.bz2
Train/Valid/Test sizes: (2722930, 14) (362407, 14) (805753, 14)
Train/Valid/Test split: 0.70 0.09 0.21
   ... with instance ids
CatFX: []
Train/Valid/Test sizes: (2330524, 14) (754813, 14) (805753, 14)
Train/Valid/Test split: 0.60 0.19 0.21
Saving processed files *.atks.bz2
LightGBM loading exception
[Errno 2] No such file or directory: '../out/models/wine/par-robust_wine_L-sse_B120_T20_D8_I20.model'
Dill loading exception


AttributeError: 'NoneType' object has no attribute 'predict'

## Credit

In [None]:
DATASET_NAME="credit"

# Final Models
robust_models = [#"../out/models/credit/par-robust_credit_B10_T100_D8_I20.model",
                 #"../out/models/credit/par-robust_credit_B30_T100_D8_I20.model",
                 "../out/models/credit/par-robust_credit_B40_T100_D8_I20.model",
                 "../out/models/credit/par-robust_credit_B60_T100_D8_I20.model"
                 #"../out/models/credit/par-robust_credit_L-sse_B60_T100_D8_I20.model"
                ]

base_models = [#"../out/models/credit/adv-boosting_credit_B10_T100_S0050_L256_R56.model",
               #"../out/models/credit/adv-boosting_credit_B30_T100_S0050_L256_R40.model",
               "../out/models/credit/adv-boosting_credit_B40_T100_S0050_L256_R56.model",              
               "../out/models/credit/adv-boosting_credit_B60_T100_S0050_L256_R50.model"
              ]

budgets = [40,60]#[10,30,40,60]

for budget, robust_model, base_model in zip(budgets, robust_models, base_models):

    _,_,_,_, test_set, test_set_groups = load_attacked_dataset(DATASET_NAME, budget)
    assert len(test_set)==np.sum(test_set_groups), "Incorrect groups"
    
    mcnemar_test(robust_model, base_model, test_set, test_set_groups)

Pre-processing original files...
Loading: ../data/credit/attacks/train_B40.atks.bz2
Loading: ../data/credit/attacks/valid_B40.atks.bz2
Loading: ../data/credit/attacks/test_B40.atks.bz2
Train/Valid/Test sizes: (1650064, 25) (509022, 25) (506132, 25)
Train/Valid/Test split: 0.62 0.19 0.19
   ... with instance ids
CatFX: []
Train/Valid/Test sizes: (1650064, 25) (509022, 25) (506132, 25)
Train/Valid/Test split: 0.62 0.19 0.19
Saving processed files *.atks.bz2
LightGBM loading exception
BaggingClassifier(base_estimator=RobustDecisionTree(affine=None, attacker=None,
                                                    feature_blacklist={},
                                                    max_depth=8,
                                                    max_features=0.8,
                                                    max_samples=0.8,
                                                    min_instances_per_node=20,
                                                    replace_features=False,




## CENSUS 120

In [None]:
DATASET_NAME="census"
budget = 120
robust_model = "../out/models/census/par-robust_census_B120_T100_D8_I20.model"
base_model   = "../out/models/census/adv-boosting_census_B120_T100_S0050_L256_R94.model"

_,_,_,_, test_set, test_set_groups = load_attacked_dataset(DATASET_NAME, budget)
assert len(test_set)==np.sum(test_set_groups), "Incorrect groups"

In [None]:
mcnemar_test(robust_model, base_model, test_set, test_set_groups)

## CENSUS 90

In [None]:
DATASET_NAME  = "census"
budget        = 90
robust_model  = "../out/models/census/par-robust_census_B90_T100_D8_I20.model"
base_model    = "../out/models/census/adv-boosting_census_B90_T100_S0050_L256_R98.model"

_,_,_,_, test_set, test_set_groups = load_attacked_dataset(DATASET_NAME, budget)
assert len(test_set)==np.sum(test_set_groups), "Incorrect groups"

In [None]:
mcnemar_test(robust_model, base_model, test_set, test_set_groups)

## CENSUS 60

In [None]:
DATASET_NAME  = "census"
budget        = 60
robust_model  = "../out/models/census/par-robust_census_B60_T100_D8_I20.model"
base_model    = "../out/models/census/icml2019_census_B60_T100_D8_I20.model"

_,_,_,_, test_set, test_set_groups = load_attacked_dataset(DATASET_NAME, budget)
assert len(test_set)==np.sum(test_set_groups), "Incorrect groups"

In [None]:
mcnemar_test(robust_model, base_model, test_set, test_set_groups)

## CENSUS 30

In [None]:
DATASET_NAME  = "census"
budget        = 30
robust_model  = "../out/models/census/par-robust_census_B30_T100_D8_I20.model"
base_model    = "../out/models/census/adv-boosting_census_B30_T100_S0050_L256_R100.model"

_,_,_,_, test_set, test_set_groups = load_attacked_dataset(DATASET_NAME, budget)
assert len(test_set)==np.sum(test_set_groups), "Incorrect groups"

In [None]:
mcnemar_test(robust_model, base_model, test_set, test_set_groups)