# Evaluating Models

This notebook contains the code used for evaluating the following learning models:

-  **Standard GBDT** (_baseline 1_)
-  **Adversarial Boosting** (_baseline 2_)
-  **Non-Interferent GBDT** (our proposal)

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import json
import glob
import pickle
import dill
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import lightgbm
import functools
import parallel_robust_forest
from os import listdir
from os.path import isfile, join
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import roc_auc_score, f1_score, confusion_matrix, precision_score, recall_score
from nilib import *
from sklearn.ensemble import BaggingClassifier

# Standard evaluation metric

The following function is the one used for evaluating the quality of the learned model (either _standard_, _adversarial-boosting_, or _non-interferent_). This is the standard <code>avg_log_loss</code>.

In [3]:
def logistic(x):
    return 1.0/(1.0 + np.exp(-x))

In [4]:
def logit(p):
    return np.log(p/(1-p))

In [5]:
def binarize(preds):
    if np.min(preds)<-0.001:
        return np.where(preds>=0,  1.0, -1.0)
    else:
        return np.where(preds>=.5, 1.0, -1.0)

# <code>avg_log_loss</code>

In [6]:
# self-defined eval metric
# f(preds: array, train_data: Dataset) -> name: string, value: array, is_higher_better: bool
def avg_log_loss(preds, train_data):
    
    labels = train_data.get_label()
    losses = np.log(1.0 + np.exp(-preds*labels))
    avg_loss = np.mean(losses)
    
    return 'avg_binary_log_loss', avg_loss, False

In [7]:
def eval_log_loss(y_true, y_pred):
    losses = np.log(1.0 + np.exp(-y_pred*y_true))
    avg_loss = np.mean(losses)
    return avg_loss

# Custom evaluation metric

Similarly to what we have done for <code>fobj</code>, <code>feval</code> can be computed from a weighted combination of two evaluation metrics:

-  <code>avg_log_loss</code> (standard, defined above);
-  <code>avg_log_loss_uma</code> (custom, defined below).

# <code>avg_log_loss_uma</code>

This is the binary log loss yet modified to operate on groups of perturbed instances.

In [None]:
# Our custom metric

def binary_log_loss(pred, true_label):

    return np.log(1.0 + np.exp(-pred * true_label))

# self-defined eval metric
# f(preds: array, train_data: Dataset) -> name: string, value: array, is_higher_better: bool
def avg_log_loss_uma(preds, train_data):
    labels = train_data.get_label()
    attack_lens = train_data.get_group()
    
    offset = 0
    max_logloss = []
    avg_max_logloss = 0.0
    
    if attack_lens is not None:
    
        for atk in attack_lens:
            losses = [binary_log_loss(h,t) for h,t in zip(preds[offset:offset+atk], labels[offset:offset+atk])]
            max_logloss.append(max(losses))
            
            offset += atk
        
        avg_max_logloss = np.mean(max_logloss)  
        
    return 'avg_binary_log_loss_under_max_attack', avg_max_logloss, False

In [None]:
def eval_log_loss_uma(preds, test, test_groups=None, svm=False):
    
    lgbm_test = lightgbm.Dataset(data=test.iloc[:,:-1].values, 
                                 label=test.iloc[:,-1].values,
                                 group=test_groups,
                                 free_raw_data=False)
    
    return avg_log_loss_uma(preds,lgbm_test)[1]


# <code>eval_binary_err_rate</code>

In [None]:
def eval_binary_err_rate(y_true, y_pred):
    errs = np.sum(binarize(y_pred) != y_true)
    return errs/len(y_true)


# <code>eval_roc_auc</code>

In [None]:
def eval_roc_auc(y_true, y_pred):
    return roc_auc_score(y_true=y_true, y_score=y_pred)


# <code>eval_specificity</code>

In [None]:
def eval_specificity(y_true, y_pred):
    y_pred = binarize(y_pred)
    tn, fp, fn, tp = confusion_matrix(y_true=y_true, y_pred=y_pred).ravel()

    return tn/(tn + fp)

# <code>eval_precision</code>

In [None]:
def eval_precision(y_true, y_pred):
    y_pred = binarize(y_pred)
    return precision_score(y_true=y_true, y_pred=y_pred, average='weighted')


# <code>eval_recall</code>

In [None]:
def eval_recall(y_true, y_pred):
    y_pred = binarize(y_pred)
    return recall_score(y_true=y_true, y_pred=y_pred, average='weighted')


# <code>eval_f1</code>

In [None]:
def eval_f1_micro(y_true, y_pred):
    y_pred = binarize(y_pred)
    return f1_score(y_true=y_true, y_pred=y_pred, average='micro')
def eval_f1_macro(y_true, y_pred):
    y_pred = binarize(y_pred)
    return f1_score(y_true=y_true, y_pred=y_pred, average='macro')

# Evaluate each model w.r.t. _all_ evaluation metrics

In [None]:
import sklearn

def model_predict(model,test_set):
    X = test_set.iloc[:,:-1].values

    if isinstance(model, sklearn.ensemble.BaggingClassifier):
        print ("BaggingClassifier")
#         print ( np.min( model.predict_proba(X)[:,0] ), np.max( model.predict_proba(X)[:,0] ) )
#         print ( np.min( model.predict_proba(X)[:,1] ), np.max( model.predict_proba(X)[:,1] ) )
        return model.predict_proba(X)[:,1]
        # return model.predict(X)
    else:
        print ("LightGBM")
#        print (np.unique( model.predict(X) ) )
#         lgbm_X = lightgbm.Dataset(data=test_set.iloc[:,:-1], 
#                                   label=test_set.iloc[:,-1])

        return model.predict(test_set.iloc[:,:-1])

def model_worst_predict(model, test_set, test_groups):
    labels = test_set.iloc[:,-1].values
    preds  = model_predict(model, test_set)
    
    offset = 0
    true_labels = []
    worst_predictions = []
    
    for g in test_groups:
        true_label = labels[offset]
        true_labels.append(true_label)
        predictions_att = preds[offset:offset+g]
        if true_label == 1:
            worst_predictions.append(np.min(predictions_att))
        else:
            worst_predictions.append(np.max(predictions_att))
    
        offset += g

    return np.array(true_labels), np.array(worst_predictions)

In [None]:
def eval_learned_models(eval_metrics, model, model_type, test, test_groups=None, budget=0):
    # output dataframe
    header = ['Model'] + ['Budget'] + [m.__name__.replace('eval_','').replace('_',' ').strip().title() 
                                       for m in eval_metrics]
    df = pd.DataFrame(columns=header)
    first_row = [model_type] + [budget] + [None for m in eval_metrics]
    df.loc[0] = first_row
    
    # predictions for plan and atk datasets
    if test_groups is None: # NOT ATKed
        y_true = test.iloc[:,-1].values
        y_pred = model_predict(model, test)
    else:
        y_true, y_pred = model_worst_predict(model, test, test_groups)
        
    for eval_metric in eval_metrics:
        res = eval_metric(y_true=y_true, y_pred=y_pred)
        print("{} learning - {} = {:.5f}"
                  .format(model_type, eval_metric.__name__, res))
        column_metric = eval_metric.__name__
        df[column_metric.replace('eval_','').replace('_',' ').strip().title()] = res

    print("******************************************************************************************************")
    
    return df

# Load attacked datasets

## Load an attacked dataset with a specific budget

In [None]:
def load_attacked_dataset(budget):
    # load train/valid/test (attacked)
    train_att, valid_att, test_att = load_atk_train_valid_test(TRAINING_FILENAME_ATT.format(budget), 
                                                                  VALIDATION_FILENAME_ATT.format(budget), 
                                                                  TEST_FILENAME_ATT.format(budget))

    test_groups = test_att['instance_id'].value_counts().sort_index().values
    test_att = test_att.iloc[:, 1:]

    valid_groups = valid_att['instance_id'].value_counts().sort_index().values
    valid_att = valid_att.iloc[:, 1:]

    train_groups = train_att['instance_id'].value_counts().sort_index().values
    train_att = train_att.iloc[:, 1:]
    
    return train_att, train_groups, valid_att, valid_groups, test_att, test_groups

## Load _all_ the attacked datasets given a list of budgets

In [None]:
def load_attacked_datasets(budgets):
    att_datasets = {}
    for b in budgets:
        att_datasets[b] = load_attacked_dataset(b)
    
    return att_datasets

# Evaluate all models w.r.t. standard metrics (i.e., attack-free)

In [None]:
def extract_model_name(model_filename):
    model_fileroot = model_filename.split('/')[-1].split('.')[0]
    model_name = model_fileroot.split('_')[0].title() 
    lossfun = model_fileroot.split ('_L-')[-1].split('_')[0]
    training_budget = ''
    budget = model_fileroot.split ('_B')[-1].split('_')[0]
    try: 
        int(budget)
        training_budget = ' [train budget={}]'.format(budget)
    except:
        pass
    
    return model_name + lossfun + training_budget

In [None]:
def load_model(model_file):
    model = None
    try:
        model = lightgbm.Booster(model_file=model_file)
    except:
        print("LightGBM loading exception")
        try:
            with open(model_file, 'rb') as mf:
                model = dill.load(mf)
                print(model)
                model.n_jobs = 16
        except Exception as e:
            print(e)
            print("Dill loading exception")
            pass
    
    return model

In [None]:
def eval_all_models(eval_metrics, models_dir, test, model_filenames=None):
    
    if model_filenames is None:
        model_csv = sorted(glob.glob(models_dir + "/*.csv"))
        model_filenames = []

        for m in model_csv:
            model_df = pd.read_csv(m)
            # print(model_df)
            model_filenames.append(model_df.sort_values(by='metric')['filename'].iloc[0])
    
    print ("### Evaluating Models:", model_filenames)
    
    df = pd.concat([eval_learned_models(eval_metrics, 
                                        load_model(mf), 
                                        extract_model_name(mf), 
                                        test) for mf in model_filenames],
                   axis=0,
                   sort=False
                  )
    
    df.reset_index(inplace=True, drop=True)
    
    return df

In [None]:
def eval_all_models_under_attack_budget(eval_metrics, models_dir, test, test_groups, budget, model_filenames=None):
    
    #model_filenames = sorted(glob.glob(models_dir + "/*.model"))
    if model_filenames is None:
        model_csv = sorted(glob.glob(models_dir + "/*.csv"))
        model_filenames = []

        for m in model_csv:
            model_df = pd.read_csv(m)
            model_filenames.append(model_df.sort_values(by='metric')['filename'].iloc[0])
    
    print ("### Evaluating Models:", model_filenames)

    df = pd.concat([eval_learned_models(eval_metrics, 
                                        load_model(mf), 
                                        extract_model_name(mf), 
                                        test,
                                        test_groups, 
                                        budget=budget
                                       ) for mf in model_filenames],
                   axis=0,
                   sort=False
                  )
    
    df.reset_index(inplace=True, drop=True)
    
    return df

In [None]:
def eval_all_models_under_attack(eval_metrics, models_dir, att_tests, budgets, model_filenames=None):
    
    eval_att_dfs = []

    for b in budgets:
        eval_att_dfs.append(
            eval_all_models_under_attack_budget(eval_metrics, models_dir, att_tests[b][4], att_tests[b][5], 
                                                b, model_filenames))
        
        
    eval_att_df = functools.reduce(lambda left,right: pd.merge(left,right,on=['Model', 'Budget']), eval_att_dfs)
    eval_att_df = pd.concat(eval_att_dfs, axis=0, sort=False)
    eval_att_df.reset_index(inplace=True, drop=True)
    
    return eval_att_df

# Evaluation metrics

In [None]:
EVAL_METRICS = [ #eval_log_loss, 
                eval_binary_err_rate,
                #eval_specificity,
                #eval_precision,
                #eval_recall,
                eval_f1_micro,
                eval_f1_macro,
                eval_roc_auc
               ]


# CENSUS Dataset

In [None]:
DATASET_NAME="census"
TRAINING_BUDGETS= [30,60] #[30,60,90,120]

DATASET_DIR="../data/{}".format(DATASET_NAME)
ATK_DIR=DATASET_DIR + "/attacks"
MODELS_DIR="../out/models/{}".format(DATASET_NAME)
OUTPUT_FILENAME="../out/results/{}".format(DATASET_NAME)

TRAINING_FILENAME=DATASET_DIR + "/" + "train.csv.bz2"
TRAINING_FILENAME_ATT=ATK_DIR + "/" + "train_B{}.atks.bz2"

VALIDATION_FILENAME=DATASET_DIR + "/" + "valid.csv.bz2"
VALIDATION_FILENAME_ATT=ATK_DIR + "/" + "valid_B{}.atks.bz2"

TEST_FILENAME=DATASET_DIR + "/" + "test.csv.bz2"
TEST_FILENAME_ATT=ATK_DIR + "/" + "test_B{}.atks.bz2"

In [None]:
# Final Models
adv_models = ["../out/models/census/adv-boosting_census_B30_T100_S0050_L256_R100.model",
              "../out/models/census/adv-boosting_census_B60_T100_S0050_L256_R96.model",
              "../out/models/census/adv-boosting_census_B90_T100_S0050_L256_R98.model",
              "../out/models/census/adv-boosting_census_B120_T100_S0050_L256_R94.model"
             ]

gdbt_models = ["../out/models/census/std-gbdt_census_T100_S0050_L24_R100.model",
               "../out/models/census/std-gbdt_census_T100_S0050_L256_R100.model"]

red_models = ["../out/models/census/red-gbdt_census_T100_S0050_L24_R95.model",
             "../out/models/census/red-gbdt_census_T100_S0050_L256_R93.model"]

rf_models = ["../out/models/census/rf-gbdt_census_T100_S0050_L256_R92.model"]



robust_models = [#"../out/models/census/par-robust_census_B30_T100_D8_I20.model",
                 #"../out/models/census/par-robust_census_B60_T100_D8_I20.model",
                 #"../out/models/census/par-robust_census_B90_T100_D8_I20.model",
                 #"../out/models/census/par-robust_census_B120_T100_D8_I20.model",
                 
                 "../out/models/census/par-robust_census_L-sse_B30_T100_D8_I20.model"
                ]

icdml2019_models = ["../out/models/census/icml2019_census_B60_T100_D8_I20.model",
                   "../out/models/census/icml2019_census_B90_T100_D8_I20.model",
                   "../out/models/census/icml2019_census_B120_T100_D8_I20.model"]

test_models =  robust_models
#test_models = icdml2019_models


# REDUCED are not working any more??!?
#test_models = red_models

In [None]:
# Without attacks
TRAIN, VALID, TEST = load_atk_train_valid_test(TRAINING_FILENAME, VALIDATION_FILENAME, TEST_FILENAME)

eval_std_df = eval_all_models(EVAL_METRICS, MODELS_DIR, TEST, test_models)
eval_std_df

In [None]:
1 - eval_std_df.loc[0,'Binary Err Rate']

In [None]:
# %%capture tests

# With attacks
att_datasets = load_attacked_datasets(TRAINING_BUDGETS)

eval_att_df = eval_all_models_under_attack(EVAL_METRICS, MODELS_DIR, att_datasets, TRAINING_BUDGETS,
                                           test_models)

overall_df = pd.concat([eval_std_df, eval_att_df], 
                       axis=0, 
                       sort=False)
overall_df.reset_index(inplace=True, drop=True)
overall_df.to_csv(OUTPUT_FILENAME + ".csv", sep=",", index=False)

overall_df

In [None]:
#overall_df

# WINE Dataset

In [None]:
DATASET_NAME="wine"
TRAINING_BUDGETS= [40] #,30,40,50,60] 

DATASET_DIR="../data/{}".format(DATASET_NAME)
ATK_DIR=DATASET_DIR + "/attacks"
MODELS_DIR="../out/models/{}".format(DATASET_NAME)
OUTPUT_FILENAME="../out/results/{}".format(DATASET_NAME)

TRAINING_FILENAME=DATASET_DIR + "/" + "train.csv.bz2"
TRAINING_FILENAME_ATT=ATK_DIR + "/" + "train_B{}.atks.bz2"

VALIDATION_FILENAME=DATASET_DIR + "/" + "valid.csv.bz2"
VALIDATION_FILENAME_ATT=ATK_DIR + "/" + "valid_B{}.atks.bz2"

TEST_FILENAME=DATASET_DIR + "/" + "test.csv.bz2"
TEST_FILENAME_ATT=ATK_DIR + "/" + "test_B{}.atks.bz2"

In [None]:
!ls -lt ../out/models/wine/*.model

In [None]:
# Final Models
adv_models = [  "../out/models/wine/adv-boosting_wine_B20_T100_S0050_L256_R99.model",
                "../out/models/wine/adv-boosting_wine_B40_T100_S0050_L256_R100.model",
                "../out/models/wine/adv-boosting_wine_B60_T100_S0050_L256_R99.model",
                "../out/models/wine/adv-boosting_wine_B80_T100_S0050_L256_R100.model",
                "../out/models/wine/adv-boosting_wine_B100_T100_S0050_L256_R100.model",
                "../out/models/wine/adv-boosting_wine_B120_T100_S0050_L256_R100.model"
             ]

gdbt_models = ["../out/models/wine/std-gbdt_wine_T100_S0050_L256_R100.model" ]

#red_models = ["../out/models/wine/red-gbdt_wine_T100_S0050_L256_R100.model"]

rf_models = ["../out/models/wine/rf-gbdt_wine_T100_S0050_L256_R68.model"]

robust_models = [   #"../out/models/wine/par-robust_wine_B20_T100_D8_I20.model",
                    #"../out/models/wine/par-robust_wine_B40_T100_D8_I20.model",
                    #"../out/models/wine/par-robust_wine_B60_T100_D8_I20.model",
                    #"../out/models/wine/par-robust_wine_B80_T100_D8_I20.model",
                    #"../out/models/wine/par-robust_wine_B100_T100_D8_I20.model",
                    #"../out/models/wine/par-robust_wine_B120_T100_D8_I20.model",
                    #"../out/models/wine/par-robust_wine_L-logloss_B40_T100_D8_I20.model",
                    "../out/models/wine/par-robust_wine_L-sse_B40_T100_D8_I20.model"
                 
                ]


icml_models = ["../out/models/wine/icml2019_wine_B20_T100_D8_I20.model",
               "../out/models/wine/icml2019_wine_B40_T100_D8_I20.model",
               "../out/models/wine/icml2019_wine_B60_T100_D8_I20.model",
               "../out/models/wine/icml2019_wine_B80_T100_D8_I20.model",
               "../out/models/wine/icml2019_wine_B100_T100_D8_I20.model",
               "../out/models/wine/icml2019_wine_B120_T100_D8_I20.model"
              ]

test_models = robust_models

#test_models = icml_models

In [None]:
# Without attacks
TRAIN, VALID, TEST = load_atk_train_valid_test(TRAINING_FILENAME, VALIDATION_FILENAME, TEST_FILENAME)

eval_std_df = eval_all_models(EVAL_METRICS, MODELS_DIR, TEST, test_models)
eval_std_df

In [None]:
# With attacks
att_datasets = load_attacked_datasets(TRAINING_BUDGETS)

eval_att_df = eval_all_models_under_attack(EVAL_METRICS, MODELS_DIR, att_datasets, TRAINING_BUDGETS,
                                           test_models)

overall_df = pd.concat([eval_std_df, eval_att_df], 
                       axis=0, 
                       sort=False)
overall_df.reset_index(inplace=True, drop=True)
overall_df.to_csv(OUTPUT_FILENAME + ".csv", sep=",", index=False)

overall_df

# SPAM Dataset

In [None]:
DATASET_NAME="spam"
TRAINING_BUDGETS= [30, 60] 

DATASET_DIR="../data/{}".format(DATASET_NAME)
ATK_DIR=DATASET_DIR + "/attacks"
MODELS_DIR="../out/models/{}".format(DATASET_NAME)
OUTPUT_FILENAME="../out/results/{}".format(DATASET_NAME)

TRAINING_FILENAME=DATASET_DIR + "/" + "train.csv.bz2"
TRAINING_FILENAME_ATT=ATK_DIR + "/" + "train_B{}.atks.bz2"

VALIDATION_FILENAME=DATASET_DIR + "/" + "valid.csv.bz2"
VALIDATION_FILENAME_ATT=ATK_DIR + "/" + "valid_B{}.atks.bz2"

TEST_FILENAME=DATASET_DIR + "/" + "test.csv.bz2"
TEST_FILENAME_ATT=ATK_DIR + "/" + "test_B{}.atks.bz2"

In [None]:
# Final Models
#adv_models = [
#              "../out/models/spam/adv-boosting_spam_B30_T100_S0050_L256_R100.model",
              
#             "../out/models/spam/adv-boosting_spam_B60_T100_S0050_L256_R85.model"
#             ]
#
#gdbt_models = [
#               "../out/models/spam/std-gbdt_spam_T100_S0050_L256_R100.model"]

#red_models = [
#             "../out/models/spam/red-gbdt_spam_T100_S0050_L256_R100.model"]

#rf_models = [
#             "../out/models/spam/rf-gbdt_spam_T100_S0050_L256_R98.model"]

robust_models = [
                "../out/models/spam/par-robust_spam_B30_T100_D8_I20.model"]

test_models = robust_models



In [None]:
# Without attacks
TRAIN, VALID, TEST = load_atk_train_valid_test(TRAINING_FILENAME, VALIDATION_FILENAME, TEST_FILENAME)

eval_std_df = eval_all_models(EVAL_METRICS, MODELS_DIR, TEST, test_models)
eval_std_df

In [None]:
# With attacks
att_datasets = load_attacked_datasets(TRAINING_BUDGETS)

eval_att_df = eval_all_models_under_attack(EVAL_METRICS, MODELS_DIR, att_datasets, TRAINING_BUDGETS,
                                           test_models)

overall_df = pd.concat([eval_std_df, eval_att_df], 
                       axis=0, 
                       sort=False)
overall_df.reset_index(inplace=True, drop=True)
overall_df.to_csv(OUTPUT_FILENAME + ".csv", sep=",", index=False)

overall_df

# CREDIT Dataset

In [None]:
DATASET_NAME="credit"
TRAINING_BUDGETS= [10,30,40,60] 

DATASET_DIR="../data/{}".format(DATASET_NAME)
ATK_DIR=DATASET_DIR + "/attacks"
MODELS_DIR="../out/models/{}".format(DATASET_NAME)
OUTPUT_FILENAME="../out/results/{}".format(DATASET_NAME)

TRAINING_FILENAME=DATASET_DIR + "/" + "train.csv.bz2"
TRAINING_FILENAME_ATT=ATK_DIR + "/" + "train_B{}.atks.bz2"

VALIDATION_FILENAME=DATASET_DIR + "/" + "valid.csv.bz2"
VALIDATION_FILENAME_ATT=ATK_DIR + "/" + "valid_B{}.atks.bz2"

TEST_FILENAME=DATASET_DIR + "/" + "test.csv.bz2"
TEST_FILENAME_ATT=ATK_DIR + "/" + "test_B{}.atks.bz2"

In [None]:
# Final Models
adv_models = ["../out/models/credit/adv-boosting_credit_B10_T100_S0050_L256_R56.model",
              "../out/models/credit/adv-boosting_credit_B30_T100_S0050_L256_R40.model",
              "../out/models/credit/adv-boosting_credit_B40_T100_S0050_L256_R56.model",              
              "../out/models/credit/adv-boosting_credit_B60_T100_S0050_L256_R50.model"
             ]

gdbt_models = ["../out/models/credit/std-gbdt_credit_T100_S0050_L256_R81.model"]

#red_models = ["../out/models/credit/red-gbdt_credit_T100_S0050_L256_R39.model"]

rf_models = ["../out/models/credit/rf-gbdt_credit_T100_S0050_L256_R37.model"]

robust_models = ["../out/models/credit/par-robust_credit_B10_T100_D8_I20.model",
                 "../out/models/credit/par-robust_credit_B30_T100_D8_I20.model",
                 "../out/models/credit/par-robust_credit_B40_T100_D8_I20.model",
                 "../out/models/credit/par-robust_credit_B60_T100_D8_I20.model"
                ]

icml2019_models = ["../out/models/credit/icml2019_credit_B30_T100_D8_I20.model",
                   "../out/models/credit/icml2019_credit_B60_T100_D8_I20.model"]
icml2019_models = ["../out/models/credit/icml2019_credit_B10_T100_D8_I20.model",
                   "../out/models/credit/icml2019_credit_B40_T100_D8_I20.model"]

test_models =  adv_models +gdbt_models + rf_models + robust_models
test_models = icml2019_models

In [None]:
# Without attacks
TRAIN, VALID, TEST = load_atk_train_valid_test(TRAINING_FILENAME, VALIDATION_FILENAME, TEST_FILENAME)

eval_std_df = eval_all_models(EVAL_METRICS, MODELS_DIR, TEST, test_models)
eval_std_df

In [None]:
# With attacks
att_datasets = load_attacked_datasets(TRAINING_BUDGETS)

eval_att_df = eval_all_models_under_attack(EVAL_METRICS, MODELS_DIR, att_datasets, TRAINING_BUDGETS,
                                           test_models)

overall_df = pd.concat([eval_std_df, eval_att_df], 
                       axis=0, 
                       sort=False)
overall_df.reset_index(inplace=True, drop=True)
overall_df.to_csv(OUTPUT_FILENAME + ".csv", sep=",", index=False)

overall_df

In [None]:
overall_df

# WEBSITES Dataset

In [None]:
DATASET_NAME="websites"
TRAINING_BUDGETS= [10,30] 

DATASET_DIR="../data/{}".format(DATASET_NAME)
ATK_DIR=DATASET_DIR + "/attacks"
MODELS_DIR="../out/models/{}".format(DATASET_NAME)
OUTPUT_FILENAME="../out/results/{}".format(DATASET_NAME)

TRAINING_FILENAME=DATASET_DIR + "/" + "train.csv.bz2"
TRAINING_FILENAME_ATT=ATK_DIR + "/" + "train_B{}.atks.bz2"

VALIDATION_FILENAME=DATASET_DIR + "/" + "valid.csv.bz2"
VALIDATION_FILENAME_ATT=ATK_DIR + "/" + "valid_B{}.atks.bz2"

TEST_FILENAME=DATASET_DIR + "/" + "test.csv.bz2"
TEST_FILENAME_ATT=ATK_DIR + "/" + "test_B{}.atks.bz2"

In [None]:
# Final Models
adv_models = ["../out/models/websites/adv-boosting_websites_B10_T100_S0050_L256_R8.model",
              "../out/models/websites/adv-boosting_websites_B30_T100_S0050_L256_R7.model"]

gdbt_models = ["../out/models/websites/std-gbdt_websites_T100_S0050_L256_R6.model"]

red_models = ["../out/models/websites/red-gbdt_websites_T100_S0050_L256_R39.model"]

rf_models = ["../out/models/websites/rf-gbdt_websites_T100_S0050_L256_R1.model"]

robust_models = ["../out/models/websites/par-robust_websites_B0_T100_D8_I20.model",
                "../out/models/websites/par-robust_websites_B60_T100_D8_I20.model",
                "../out/models/websites/par-robust_websites_B30_T100_D8_I20.model"]

test_models = adv_models + gdbt_models + rf_models

In [None]:
# Without attacks
TRAIN, VALID, TEST = load_atk_train_valid_test(TRAINING_FILENAME, VALIDATION_FILENAME, TEST_FILENAME)

eval_std_df = eval_all_models(EVAL_METRICS, MODELS_DIR, TEST, test_models)
eval_std_df

In [None]:
# With attacks
att_datasets = load_attacked_datasets(TRAINING_BUDGETS)

eval_att_df = eval_all_models_under_attack(EVAL_METRICS, MODELS_DIR, att_datasets, TRAINING_BUDGETS,
                                           test_models)

overall_df = pd.concat([eval_std_df, eval_att_df], 
                       axis=0, 
                       sort=False)
overall_df.reset_index(inplace=True, drop=True)
overall_df.to_csv(OUTPUT_FILENAME + ".csv", sep=",", index=False)

overall_df

# Prune Robust models

In [None]:
to_be_pruned_models = ["../out/models/census/robust_census_B0_T100_D8_I20_20.tmp"]

for m in to_be_pruned_models:
    prune_trained_model(m, 20)


# Prune LGBM models

In [None]:
def prune_lgbm(in_file, out_file, n):
    model = lightgbm.Booster(model_file=in_file)
    model.save_model(out_file, num_iteration=n)
    print ("saved.")
    
prune_lgbm("../out/models/wine/std-gbdt_wine_T200_S0050_L24_R199.model",
           "../out/models/wine/std-gbdt_wine_T200_S0050_L24_R199.T10.model",
           10)
# prune_lgbm("../out/models/census/adv-boosting_census_B60_T200_S0050_L24_R200.model",
#            "../out/models/census/adv-boosting_census_B60_T200_S0050_L24_R200.T20.model",
#            20)

## Debug

In [8]:
with open("../out/models/wine/par-robust_wine_L-sse_B40_T10_D8_I20.model", 'rb') as f:
    model = dill.load(f)

In [9]:
import sys
def pretty_print(node, out=sys.stdout, tabs=''):

    leaf_txt = "{}Prediction: {}; Score: {:.5f};N. instances: {}; Loss: {:.5f};Gain:{:.5f} ".format(tabs,
                                                                                        node.get_node_prediction()[
                                                                                            0],
                                                                                        node.get_node_prediction()[
                                                                                            1],
                                                                                        node.values,
                                                                                        node.loss_value,
                                                                                        node.gain_value,
                                                                                                     
                                                                                                                           
                                                                                        )
    internal_node_txt = "{}Feature ID: {}; Threshold: {}; N. instances: {}; Loss: {:.5f}; Gain:{:.5f};N.constraints:{:3d}".format(tabs,
                                                                                   node.best_split_feature_id,
                                                                                   node.best_split_feature_value,
                                                                                   node.values,
                                                                                   node.loss_value,
                                                                                   node.gain_value,
                                                                                   node.constraints                                        
                                                                                   )

    if node.is_leaf():  # base case
        out.write(leaf_txt + "\n")
    else: 
        
        # recursive case
        out.write(internal_node_txt + "\n")
        #node.left.pretty_print(out, tabs + "\t")
        node.right.pretty_print(out, tabs + "\t")

pretty_print(model.estimators_[8].root)


Feature ID: 10; Threshold: 11.5; N. instances: 3118; Loss: 656.58532; Gain:63.89082;N.constraints:  0
	Feature ID: 5; Threshold: 8.0; N. instances: 594;Loss:35.98276; gain: 1.32364,N.constraints:  0
		Feature ID: 4; Threshold: 0.032; N. instances: 72;Loss:7.01935; gain: 4.25842,N.constraints:  0


TypeError: unsupported format string passed to NoneType.__format__

In [None]:
#pretty_print(model.estimators_[1].root )

# Feature importance check

In [None]:
def print_fx_imp(model, colnames):
    fx_uses = model.feature_importance(importance_type='split')
    fx_gain = model.feature_importance(importance_type='gain')

    for i,f in enumerate(np.argsort(fx_gain)[::-1]):
        print ("{:2d} {:20s} {:.3f} {:4d}".format(i, colnames[f], fx_gain[f], fx_uses[f]))

print("-- GDBT --")    
#gbdt = lightgbm.Booster(model_file="../out/models/census/std-gbdt_census_T100_S0050_L24_R100.model")
gbdt = lightgbm.Booster(model_file="../out/models/credit/std-gbdt_credit_T100_S0050_L256_R37.model")
print(gbdt.num_trees())
print_fx_imp(gbdt, TRAIN.columns)

# print(" -- Reduced GDBT --")    
# #redf = lightgbm.Booster(model_file="../out/models/census/red-gbdt_census_T100_S0050_L24_R98.model")
# redf = lightgbm.Booster(model_file="../out/models/credit/red-gbdt_credit_T100_S0050_L256_R37.model")
# print(redf.num_trees())
# print_fx_imp(redf, TRAIN.drop(columns=["workclass", 
#                                        "marital_status", 
#                                        "occupation", 
#                                        "education_num", 
#                                        "hours_per_week", 
#                                        "capital_gain"
#                                       ]).columns) 


# print("-- Adv. Boosting --")    
# advb = lightgbm.Booster(model_file="../out/models/census/adv-boosting_census_B30_T100_S0050_L24_R100.model")
# print(advb.num_trees())
# print_fx_imp(advb, TRAIN.columns)


In [None]:
TRAIN[['PAY_0', 'BILL_AMT1', 'PAY_AMT3', 'PAY_AMT2', 'PAY_AMT6', 'PAY_2', 'LIMIT_BAL', 'AGE']].describe()

In [None]:
bb = 40
eval_learned_models(lightgbm.Booster(model_file="../out/models/wine2/red-gbdt_wine2_T500_S0050_L24_R281.model"), 
                                        extract_model_name("../out/models/wine2/red-gbdt_wine2_T500_S0050_L24_R281.model"), 
                                        att_datasets[bb][4].drop(columns=["alcohol", "residual_sugar", "volatile_acidity"]), 
                                        att_datasets[bb][5], 
                                        budget=bb
                                       ) 

In [None]:
!cat ../out/models/census/par-robust_census_B0_T100_D8_I20.model | tail

In [None]:
!git pull

In [None]:
!git commit -am "calza"

In [None]:
!git pull

In [None]:
!git push

# Financial Distress

In [None]:
DATASET_NAME="financial"
TRAINING_BUDGETS= [10] 

DATASET_DIR="../data/{}".format(DATASET_NAME)
ATK_DIR=DATASET_DIR + "/attacks"
MODELS_DIR="../out/models/{}".format(DATASET_NAME)
OUTPUT_FILENAME="../out/results/{}".format(DATASET_NAME)

TRAINING_FILENAME=DATASET_DIR + "/" + "train.csv.bz2"
TRAINING_FILENAME_ATT=ATK_DIR + "/" + "train_B{}.atks.bz2"

VALIDATION_FILENAME=DATASET_DIR + "/" + "valid.csv.bz2"
VALIDATION_FILENAME_ATT=ATK_DIR + "/" + "valid_B{}.atks.bz2"

TEST_FILENAME=DATASET_DIR + "/" + "test.csv.bz2"
TEST_FILENAME_ATT=ATK_DIR + "/" + "test_B{}.atks.bz2"

In [None]:
# Final Models
adv_models = ["../out/models/financial/adv-boosting_financial_B10_T100_S0050_L256_R51.model"
              ]

gdbt_models = ["../out/models/financial/std-gbdt_financial_T100_S0050_L256_R54.model"]

red_models = ["../out/models/financial/red-gbdt_financial_T100_S0050_L256_R49.model"]

rf_models = ["../out/models/financial/rf-gbdt_financial_T100_S0050_L256_R12.model"]

robust_models = ["../out/models/websites/par-robust_websites_B0_T100_D8_I20.model",
                "../out/models/websites/par-robust_websites_B60_T100_D8_I20.model",
                "../out/models/websites/par-robust_websites_B30_T100_D8_I20.model"]

test_models = adv_models + gdbt_models + rf_models + red_models

In [None]:
# Without attacks
TRAIN, VALID, TEST = load_atk_train_valid_test(TRAINING_FILENAME, VALIDATION_FILENAME, TEST_FILENAME)

eval_std_df = eval_all_models(EVAL_METRICS, MODELS_DIR, TEST, test_models)
eval_std_df

In [None]:
# With attacks
att_datasets = load_attacked_datasets(TRAINING_BUDGETS)

eval_att_df = eval_all_models_under_attack(EVAL_METRICS, MODELS_DIR, att_datasets, TRAINING_BUDGETS,
                                           test_models)

overall_df = pd.concat([eval_std_df, eval_att_df], 
                       axis=0, 
                       sort=False)
overall_df.reset_index(inplace=True, drop=True)
overall_df.to_csv(OUTPUT_FILENAME + ".csv", sep=",", index=False)

overall_df

# malware

In [None]:
DATASET_NAME="malware"
TRAINING_BUDGETS= [20,40] 

DATASET_DIR="../data/{}".format(DATASET_NAME)
ATK_DIR=DATASET_DIR + "/attacks"
MODELS_DIR="../out/models/{}".format(DATASET_NAME)
OUTPUT_FILENAME="../out/results/{}".format(DATASET_NAME)

TRAINING_FILENAME=DATASET_DIR + "/" + "train.csv.bz2"
TRAINING_FILENAME_ATT=ATK_DIR + "/" + "train_B{}.atks.bz2"

VALIDATION_FILENAME=DATASET_DIR + "/" + "valid.csv.bz2"
VALIDATION_FILENAME_ATT=ATK_DIR + "/" + "valid_B{}.atks.bz2"

TEST_FILENAME=DATASET_DIR + "/" + "test.csv.bz2"
TEST_FILENAME_ATT=ATK_DIR + "/" + "test_B{}.atks.bz2"

In [None]:
# Final Models

Ab_models = ["../out/models/malware/adv-boosting_malware_B20_T100_S0050_L256_R69.model",
             "../out/models/malware/adv-boosting_malware_B40_T100_S0050_L256_R97.model"
             
            ]
rf_models = ["../out/models/malware/rf-gbdt_malware_T100_S0050_L256_R48.model"]

std_mod =["../out/models/malware/std-gbdt_malware_T100_S0050_L256_R100.model"]


test_models = Ab_models + rf_models

In [None]:
# Without attacks
TRAIN, VALID, TEST = load_atk_train_valid_test(TRAINING_FILENAME, VALIDATION_FILENAME, TEST_FILENAME)

eval_std_df = eval_all_models(EVAL_METRICS, MODELS_DIR, TEST, test_models)
eval_std_df

In [None]:
# With attacks
att_datasets = load_attacked_datasets(TRAINING_BUDGETS)

eval_att_df = eval_all_models_under_attack(EVAL_METRICS, MODELS_DIR, att_datasets, TRAINING_BUDGETS,
                                           test_models)

overall_df = pd.concat([eval_std_df, eval_att_df], 
                       axis=0, 
                       sort=False)
overall_df.reset_index(inplace=True, drop=True)
overall_df.to_csv(OUTPUT_FILENAME + ".csv", sep=",", index=False)

overall_df