# Documentation

 - http://lightgbm.readthedocs.io/en/latest/
 - http://lightgbm.readthedocs.io/en/latest/Python-Intro.html
 - https://github.com/Microsoft/LightGBM/tree/master/examples/python-guide

In [9]:
import os
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import lightgbm
import functools
from os import listdir
from os.path import isfile, join
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import roc_auc_score, f1_score
# Adding the following line, allows Jupyter Notebook to visualize plots
# produced by matplotlib directly below the code cell which generated those.
%matplotlib inline

np.random.seed(73)

# Load 6 different datasets:
-  Training set (original)
-  Training set (_attacked_)
-  Validation set (original)
-  Validation set (_attacked_)
-  Test set (original)
-  Test set (_attacked_)

In [10]:
DATASETS_PATH = "../data/census"
MODELS_PATH = "../out/models"
ATTACKER = "strong" # weak
TRAINING_SET="train_ori.csv.bz2" # original training set
TRAINING_SET_ATT="train_"+ATTACKER+"_att.csv.bz2" # perturbed training set
VALIDATION_SET="valid_ori.csv.bz2" # original validation set
VALIDATION_SET_ATT="valid_"+ATTACKER+"_att.csv.bz2" # perturbed validation set
TEST_SET="test_ori.csv.bz2" # original test set
TEST_SET_ATT="test_"+ATTACKER+"_att.csv.bz2" # perturbed test set

In [11]:
def load_dataset(path, dataset_filename, sep=","):
    return pd.read_csv(path+"/"+dataset_filename, sep=sep)

In [12]:
def infer_categorical_features(dataset):
    categorical_features = []
    for column in dataset.columns:
        if dataset[column].dtype == 'object':
            categorical_features.append(column)
    return categorical_features
            
def label_encode(dataset, categorical_features):
    dataset_le = dataset.copy()
    for column in dataset_le.columns:
        if column in categorical_features:
            dataset_le[column] = dataset_le[column].astype('category')
            dataset_le[column] = dataset_le[column].cat.codes
    return dataset_le

In [13]:
def one_hot_encode(dataset, label):
    dataset_oh = pd.get_dummies(dataset)
    columns = dataset_oh.columns.tolist()
    columns.insert(len(columns), columns.pop(columns.index(label)))
    dataset_oh = dataset_oh.loc[:,columns]
    dataset_oh.columns = columns
    
    return dataset_oh

In [14]:
TRAIN = load_dataset(DATASETS_PATH, TRAINING_SET)
TRAIN_ATT = load_dataset(DATASETS_PATH, TRAINING_SET_ATT)

VALID = load_dataset(DATASETS_PATH, VALIDATION_SET)
VALID_ATT = load_dataset(DATASETS_PATH, VALIDATION_SET_ATT)

TEST = load_dataset(DATASETS_PATH, TEST_SET)
TEST_ATT = load_dataset(DATASETS_PATH, TEST_SET_ATT)

# Compute group lengths

In [15]:
TRAIN_ATT_OFFSETS = TRAIN_ATT['instance_id'].value_counts().sort_index().values
VALID_ATT_OFFSETS = VALID_ATT['instance_id'].value_counts().sort_index().values
TEST_ATT_OFFSETS = TEST_ATT['instance_id'].value_counts().sort_index().values

# Infer _categorical_ features

In [16]:
CATEGORICAL_FEATURES = infer_categorical_features(TRAIN)
print("List of categorical features: [{}]"
      .format(", ".join([cf for cf in CATEGORICAL_FEATURES])))

List of categorical features: [workclass, education, marital_status, occupation, relationship, race, sex, native_country]


# Transform _categorical_ features to _numeric_ (label encoding)

In [17]:
TRAIN = label_encode(TRAIN, set(CATEGORICAL_FEATURES))
TRAIN_ATT = label_encode(TRAIN_ATT.iloc[:,1:], set(CATEGORICAL_FEATURES))

VALID = label_encode(VALID, set(CATEGORICAL_FEATURES))
VALID_ATT = label_encode(VALID_ATT.iloc[:,1:], set(CATEGORICAL_FEATURES))

TEST = label_encode(TEST, set(CATEGORICAL_FEATURES))
TEST_ATT = label_encode(TEST_ATT.iloc[:,1:], set(CATEGORICAL_FEATURES))

# Transform _categorical_ features to _numeric_ (one-hot encoding)

In [18]:
# TRAIN = one_hot_encode(TRAIN, "income_greater_than_50k")
# TRAIN_ATT = one_hot_encode(TRAIN_ATT.iloc[:,1:], "income_greater_than_50k")

# VALID = one_hot_encode(VALID, "income_greater_than_50k")
# VALID_ATT = one_hot_encode(VALID_ATT.iloc[:,1:], "income_greater_than_50k")

# TEST = one_hot_encode(TEST, "income_greater_than_50k")
# TEST_ATT = one_hot_encode(TEST_ATT.iloc[:,1:], "income_greater_than_50k")

# common_columns = TRAIN.columns & VALID.columns & TEST.columns

# TRAIN = TRAIN[common_columns]
# TRAIN_ATT = TRAIN_ATT[common_columns]
# VALID = VALID[common_columns]
# VALID_ATT = VALID_ATT[common_columns]
# TEST = TEST[common_columns]
# TEST_ATT = TEST_ATT[common_columns]

In [19]:
print(TRAIN.shape)
print(TRAIN_ATT.shape)
print(VALID.shape)
print(VALID_ATT.shape)
print(TEST.shape)
print(TEST_ATT.shape)

(27145, 15)
(1424376, 15)
(3017, 15)
(155826, 15)
(15060, 15)
(795276, 15)


In [20]:
TRAIN_VALID = pd.concat([TRAIN, VALID], axis=0)

In [21]:
print(TRAIN_VALID.shape)

(30162, 15)


# Hyperparameters used for _standard_ and _baseline_ learning

-  Training is done by optimizing (i.e., minimizing) standard **(binary) log loss** (<code>fobj=optimize_log_loss</code>)
-  Evaluation is measured using standard **(binary) log loss** (<code>feval=avg_log_loss</code>)

In [22]:
# Please, refer to https://slundberg.github.io/shap/notebooks/Census%20income%20classification%20with%20LightGBM.html for any further detail
# or
# https://medium.com/@pushkarmandot/https-medium-com-pushkarmandot-what-is-lightgbm-how-to-implement-it-how-to-fine-tune-the-parameters-60347819b7fc
STD_PARAMS = {
    "max_bin": 511,
    "learning_rate": 0.05,
    "boosting_type": "gbdt",#"rf"
    "objective": "regression_l2", #"binary",
    "metric": ["None"], # We use our own implementation of binary log loss (i.e., optimize_log_loss) 
                        # instead of the default one (i.e., "binary_logloss"), which may be in fact cross-entropy
    "num_leaves": 15,
    "verbose": 1,
    "min_data_in_leaf": 20,
    "bagging_freq": 1,
    "bagging_fraction": 0.8,
    "feature_fraction": 0.8,
    "boost_from_average": True
}

# Hyperparameters used for _non-interferent_ learning

-  Training is done by optimizing (i.e., minimizing) one of our custom _objective functions_:
    -  **(binary) log loss under max attack** (<code>fobj=optimize_log_loss_uma</code>);
    -  **weighted sum of (binary) log loss and (binary) log loss under max attack** (<code>fobj=optimize_weighted_sum_log_loss_log_loss_uma</code>);
    
-  Evaluation is measured using one of our custom _evaluation functions_: 
    -  **(binary) log loss under max attack** (<code>feval=avg_log_loss_uma</code>);
    -  **weighted sum of (binary) log loss and (binary) log loss under max attack** (<code>feval=avg_weighted_sum_log_loss_log_loss_uma</code>)

In [23]:
# Please, refer to https://slundberg.github.io/shap/notebooks/Census%20income%20classification%20with%20LightGBM.html for any further detail
# or
# https://medium.com/@pushkarmandot/https-medium-com-pushkarmandot-what-is-lightgbm-how-to-implement-it-how-to-fine-tune-the-parameters-60347819b7fc
NON_INTERFERENT_PARAMS = {
    "max_bin": 511,
    "learning_rate": 0.05,
    "boosting_type": "gbdt",#"rf",
    "objective": "regression_l2",
    "metric": ["None"], # We will specify our own custom objective function (i.e., optimize_binary_logloss_under_max_attack)
    "num_leaves": 15,
    "verbose": 1,
    "min_data_in_leaf": 20,
    "bagging_freq": 1,
    "bagging_fraction": 0.8,
    "feature_fraction": 0.8,
    "boost_from_average": True
}

# Common Hyperparameters

In [24]:
MAX_BOOST_ROUNDS = 200
MIN_BOOST_ROUNDS = 200
STEP_BOOST_ROUNDS = 50
BOOST_ROUNDS = [br for br in range(MIN_BOOST_ROUNDS, MAX_BOOST_ROUNDS+1, STEP_BOOST_ROUNDS)]
STD_ALPHA = 0.0 # alpha weight for standard learning (i.e., the loss coincides with the binary log loss)
NON_INTERFERENT_ALPHA = 1.0 # alpha weight for non-interferent learning (i.e., the loss coincides with the binary log loss under max attack)

# Standard objective function

The following function, called <code>optimize_log_loss</code>, is the one that should be optimized (i.e., minimized) for learning _standard_ and _baseline_ approaches. More specifically, this is the standard binary log loss which is used to train any _standard_ or _baseline_ model.

# $L$ = <code>optimize_log_loss</code>

$$
L = \frac{1}{|\mathcal{D}|} \cdot \sum_{(\mathbf{x},y) \in \mathcal{D}}\ell(h(\mathbf{x}), y)
$$

where:

$$
\ell(h(\mathbf{x}), y) = log(1+e^{(-yh(\mathbf{x}))})
$$

In [25]:
# self-defined objective function
# f(preds: array, train_data: Dataset) -> grad: array, hess: array

# To be used with a regression task
def optimize_log_loss(preds, train_data):
    labels = train_data.get_label()
    exp_pl = np.exp(preds * labels)
    # http://www.wolframalpha.com/input/?i=differentiate+log(1+%2B+exp(-kx)+)
    grads = -labels / (1.0 +  exp_pl)  
    # http://www.wolframalpha.com/input/?i=d%5E2%2Fdx%5E2+log(1+%2B+exp(-kx)+)
    hess = labels**2 * exp_pl / (1.0 + exp_pl)**2 

    # this is to optimize average logloss
    norm = 1.0/len(preds)
    grads *= norm
    hess *= norm
    
    return grads, hess

# Custom objective function

In addition to the standard binary log loss used to train a model, we introduce our custom <code>optimize_non_interferent_log_loss</code>, which is computed as the weighted combination of two objective functions, as follows:

-  $L$ = <code>optimize_log_loss</code> (standard, already seen above);
-  $L^A$ = <code>optimize_log_loss_uma</code> (custom, defined below).

# $L^A$ = <code>optimize_log_loss_uma</code>

This function is used to train a **full** _non-interferent_ model; in other words, full non-interferent models are learned by optimizing (i.e., minimizing) the function which measures the binary log loss **under the maximal attack** possible.

$$
L^A = \frac{1}{|\mathcal{D}|} \cdot \sum_{(\mathbf{x},y) \in \mathcal{D}} \log  \left( \sum_{\mathbf{x}' \in \mathit{MaxAtk}({\mathbf{x}},{A})} e^{\ell(h(\mathbf{x}'), y)} \right).
$$

where still:

$$
\ell(h(\mathbf{x}), y) = log(1+e^{(-yh(\mathbf{x}))})
$$

In [26]:
# self-defined objective function
# f(preds: array, train_data: Dataset) -> grad: array, hess: array

def optimize_log_loss_uma(preds, train_data):
    labels = train_data.get_label()
    attack_lens = train_data.get_group()
    
    grads = np.zeros_like(labels, dtype=np.float64)
    hess = np.zeros_like(grads)
    
    if attack_lens is not None:

        norm = 1.0 / float(len(attack_lens))

        offset = 0
        for atk in attack_lens:
            exp_pl = np.exp(- preds[offset:offset+atk] * labels[offset:offset+atk])

            inv_sum = 1.0 / np.sum(1.0 + exp_pl)

            x_grad = inv_sum * exp_pl

            grads[offset:offset+atk] = norm * x_grad * (- labels[offset:offset+atk])
            hess[offset:offset+atk]  = norm * x_grad * (1.0 - x_grad)

            offset += atk    
    
    return grads, hess

# <code>optimize_non_interferent_log_loss</code>

$$
\alpha\cdot L^A + (1-\alpha)\cdot L
$$

$$
\alpha \cdot \underbrace{\Bigg[\frac{1}{|\mathcal{D}|} \cdot \sum_{(\mathbf{x},y) \in \mathcal{D}} \log  \left( \sum_{\mathbf{x}' \in \mathit{MaxAtk}({\mathbf{x}},{A})} e^{\ell(h(\mathbf{x}'), y)} \right)\Bigg]}_{L^A} + (1-\alpha) \cdot \underbrace{\Bigg[\frac{1}{|\mathcal{D}|} \cdot \sum_{(\mathbf{x},y) \in \mathcal{D}} \ell(h(\mathbf{x}, y))\Bigg]}_{L}
$$

In [27]:
# self-defined objective function
# f(preds: array, train_data: Dataset) -> grad: array, hess: array

def optimize_non_interferent_log_loss(preds, train_data, alpha=1.0):
    # binary logloss under maximal attack
    grads_uma, hess_uma = optimize_log_loss_uma(preds, train_data)
    
    # binary logloss (plain)
    grads_plain, hess_plain = optimize_log_loss(preds, train_data)
    
    # combine the above two losses together
    grads = alpha*grads_uma + (1.0-alpha)*grads_plain
    hess  = alpha*hess_uma  + (1.0-alpha)*hess_plain
    
    return grads, hess

## Using one objective function for both _standard_ and _non-interferent_ learning

The advantage of the <code>optimize_non_interferent_log_loss</code> function defined above is that we can wrap it so that we can use it as the only objective function (<code>fobj</code>) passed in to LightGBM. 

In other words, if we call <code>fobj=optimize_non_interferent_log_loss</code> with <code>alpha=0.0</code>, this will end up optimizing (i.e., minimizing) the standard objective function (i.e., the standard binary log loss, defined by the function <code>optimize_log_loss</code> above).

Conversely, calling <code>fobj=optimize_non_interferent_log_loss</code> with <code>alpha=1.0</code> turns into optimizing (i.e., minimizing) the full non-interferent objective function (i.e., the custom binary log loss under max attack, defined by the function <code>optimize_log_loss_uma</code> above).

Anything that sits in between (i.e., <code>0 < alpha < 1</code>) optimizes an objective function that trades off between the standard and the full non-interferent term.

# Standard evaluation metric

The following function is the one used for evaluating the quality of the learned model (either _standard_, _baseline_, or _non-interferent_). This is the standard <code>avg_log_loss</code>.

In [28]:
def logistic(x):
    return 1.0/(1.0 + np.exp(-x))

In [29]:
def logit(p):
    return np.log(p/(1-p))

# <code>avg_log_loss</code>

In [30]:
# self-defined eval metric
# f(preds: array, train_data: Dataset) -> name: string, value: array, is_higher_better: bool
def avg_log_loss(preds, train_data):
    
    labels = train_data.get_label()
    losses = np.log(1.0 + np.exp(-preds*labels))
    avg_loss = np.mean(losses)
    
    return 'avg_binary_log_loss', avg_loss, False

In [31]:
def eval_log_loss(model, boost_round, test, test_groups=None):
    
    lgbm_test = lightgbm.Dataset(data=test.iloc[:,:-1].values, 
                                 label=test.iloc[:,-1].values,
                                 free_raw_data=False)
    
    if boost_round < 0: # no trees have been generated (used for evaluating other non-tree-based models like SVM)
        # use the logit function (i.e., the inverse of the logistic function) to map probabilities output
        # by sklearn's predict_proba in the range [0,1] to a real number in the range [-inf, +inf]
        return avg_log_loss(logit(model.predict_proba(test.iloc[:,:-1].values)[:,1]), lgbm_test)[1]
    
    return avg_log_loss(model.predict(test.iloc[:,:-1].values, num_iteration=boost_round), lgbm_test)[1]

# Custom evaluation metric

Similarly to what we have done for <code>fobj</code>, <code>feval</code> can be computed from a weighted combination of two evaluation metrics:

-  <code>avg_log_loss</code> (standard, defined above);
-  <code>avg_log_loss_uma</code> (custom, defined below).

# <code>avg_log_loss_uma</code>

This is the binary log loss yet modified to operate on groups of perturbed instances.

In [32]:
# Our custom metric

def binary_log_loss(pred, true_label):

    return np.log(1.0 + np.exp(-pred * true_label))

# self-defined eval metric
# f(preds: array, train_data: Dataset) -> name: string, value: array, is_higher_better: bool
def avg_log_loss_uma(preds, train_data):
    labels = train_data.get_label()
    attack_lens = train_data.get_group()
    
    offset = 0
    max_logloss = []
    avg_max_logloss = 0.0
    
    if attack_lens is not None:
    
        for atk in attack_lens:
            losses = [binary_log_loss(h,t) for h,t in zip(preds[offset:offset+atk], labels[offset:offset+atk])]
            max_logloss.append(max(losses))

            offset += atk
        
        avg_max_logloss = np.mean(max_logloss)  

    return 'avg_binary_log_loss_under_max_attack', avg_max_logloss, False

In [33]:
def eval_log_loss_uma(model, boost_round, test, test_groups=None):
    
    lgbm_test = lightgbm.Dataset(data=test.iloc[:,:-1].values, 
                                 label=test.iloc[:,-1].values,
                                 group=test_groups,
                                 free_raw_data=False)
    
    if boost_round < 0: # no trees have been generated (used for evaluating other non-tree-based models like SVM)
        # use the logit function (i.e., the inverse of the logistic function) to map probabilities output
        # by sklearn's predict_proba in the range [0,1] to a real number in the range [-inf, +inf]
        return avg_log_loss_uma(logit(model.predict_proba(test.iloc[:,:-1].values)[:,1]), 
                                               lgbm_test)[1]
    
    return avg_log_loss_uma(model.predict(test.iloc[:,:-1].values, num_iteration=boost_round), 
                                               lgbm_test)[1]

# <code>feval=avg_non_interferent_log_loss</code>

Used for measuring the validity of any model (either _standard_, _baseline_, or _non-interferent_). More precisely, <code>avg_non_interferent_log_loss</code> is the weighted sum of the binary log loss and the binary log loss under maximal attack.

In [34]:
# LightGBM takes lambda x,y: avg_weighted_sum_log_loss_log_loss_uma(preds, train_data, alpha=0.5)

def avg_non_interferent_log_loss(preds, train_data, alpha=1.0):
    
    # binary logloss under maximal attack
    _, loss_uma, _    = avg_log_loss_uma(preds, train_data)
    
    # binary logloss (plain)
    _, loss_plain, _  = avg_log_loss(preds, train_data)
    
    # combine the above two losses together
    weighted_loss = alpha*loss_uma + (1.0-alpha)*loss_plain

    return 'avg_non_interferent_log_loss [alpha={}]'.format(alpha), weighted_loss, False


def eval_non_interferent_log_loss(model, boost_round, test, test_groups=None, alpha=1.0):
    
    lgbm_test = lightgbm.Dataset(data=test.iloc[:,:-1].values, 
                                 label=test.iloc[:,-1].values,
                                 group=test_groups,
                                 free_raw_data=False)
    
    if boost_round < 0: # no trees have been generated (used for evaluating other non-tree-based models like SVM)
        # use the logit function (i.e., the inverse of the logistic function) to map probabilities output
        # by sklearn's predict_proba in the range [0,1] to a real number in the range [-inf, +inf]
        return avg_non_interferent_log_loss(logit(model.predict_proba(test.iloc[:,:-1].values)[:,1]), 
                                                  lgbm_test,
                                                  alpha=alpha
                                                 )[1]
    
    return avg_non_interferent_log_loss(model.predict(test.iloc[:,:-1].values, num_iteration=boost_round), 
                                                  lgbm_test,
                                                  alpha=alpha
                                                 )[1]

# Additional validity measures

In addition to the evaluation metrics defined above (used for training), we also consider the following **4** measures of validity to compare the performance of each learned model:

-  <code>eval_binary_err_rate</code>: This is the traditional binary error rate (1-accuracy);
-  <code>eval_binary_err_rate_uma</code>: This is the binary error rate modified to operate on groups of perturbed instances under maximal attack.
-  <code>eval_roc_auc</code>: This is the classical ROC AUC score;
-  <code>eval_roc_auc_uma</code>: This is the ROC AUC score modified to operate on groups of perturbed instances under maximal attack.

Again, note that those are **not** metrics used at training time (i.e., they do not define any <code>feval</code>), rather they are used to assess the (offline) quality of each learned model.

# <code>eval_binary_err_rate</code>

In [35]:
def eval_binary_err_rate(model, boost_round, test_set, test_groups=None):
    X = test_set.iloc[:,:-1].values
    labels = test_set.iloc[:,-1].values
    
    model_predictions = []
    if boost_round < 0: # no trees have been generated (used for evaluating other non-tree-based models like SVM)
        # use the logit function (i.e., the inverse of the logistic function) to map probabilities output
        # by sklearn's predict_proba in the range [0,1] to a real number in the range [-inf, +inf]
        model_predictions = logit(model.predict_proba(X)[:,1])
    else:
        model_predictions = model.predict(X, num_iteration=boost_round)
        
    predictions = [1 if p > 0 else -1 for p in model_predictions]
    
    errs = 0
    for p,l in zip(predictions,labels):
        if p != l:
            errs += 1
    return errs/len(predictions)

# <code>eval_binary_err_rate_uma</code>

In [36]:
def eval_binary_err_rate_uma(model, boost_round, test_set, test_groups=None):
    X = test_set.iloc[:,:-1].values
    labels = test_set.iloc[:,-1].values
    
    model_predictions = []
    if boost_round < 0: # no trees have been generated (used for evaluating other non-tree-based models like SVM)
        # use the logit function (i.e., the inverse of the logistic function) to map probabilities output
        # by sklearn's predict_proba in the range [0,1] to a real number in the range [-inf, +inf]
        model_predictions = logit(model.predict_proba(X)[:,1])
    else:
        model_predictions = model.predict(X, num_iteration=boost_round)
        
    predictions = [1 if p > 0 else -1 for p in model_predictions]
    
    offset = 0
    errs = 0

    for g in test_groups:
        predictions_att = predictions[offset:offset+g]
        true_label = labels[offset]
        if np.any([p != true_label for p in predictions_att]):
            errs += 1
        offset += g

    return errs/len(test_groups)

# <code>eval_roc_auc</code>

In [37]:
def eval_roc_auc(model, boost_round, test_set, test_groups=None):
    X = test_set.iloc[:,:-1].values
    labels = test_set.iloc[:,-1].values
    
    predictions = []
    
    if boost_round < 0: # no trees have been generated (used for evaluating other non-tree-based models like SVM)
        # use the logit function (i.e., the inverse of the logistic function) to map probabilities output
        # by sklearn's predict_proba in the range [0,1] to a real number in the range [-inf, +inf]
        predictions = logit(model.predict_proba(X)[:,1])
    else:
        predictions = model.predict(X, num_iteration=boost_round)
        
    
    return roc_auc_score(labels, predictions)

# <code>eval_roc_auc_uma</code>

In [38]:
def eval_roc_auc_uma(model, boost_round, test_set, test_groups=None):
    X = test_set.iloc[:,:-1].values
    labels = test_set.iloc[:,-1].values
    
    predictions = []
    
    if boost_round < 0: # no trees have been generated (used for evaluating other non-tree-based models like SVM)
        # use the logit function (i.e., the inverse of the logistic function) to map probabilities output
        # by sklearn's predict_proba in the range [0,1] to a real number in the range [-inf, +inf]
        predictions = logit(model.predict_proba(X)[:,1])
    else:
        predictions = model.predict(X, num_iteration=boost_round)
    
    
    offset = 0
    true_labels = []
    worst_predictions = []
    
    for g in test_groups:
        true_label = labels[offset]
        true_labels.append(true_label)
        predictions_att = predictions[offset:offset+g]
        prediction_distances = np.abs(predictions_att - true_label)
        worst_predictions.append(predictions_att[np.argmax(prediction_distances)])
    
        offset += g
        
    return roc_auc_score(true_labels, worst_predictions)

# <code>eval_f1</code>

In [39]:
def eval_f1(model, boost_round, test_set, test_groups=None):
    X = test_set.iloc[:,:-1].values
    labels = test_set.iloc[:,-1].values
    
    model_predictions = []
    if boost_round < 0: # no trees have been generated (used for evaluating other non-tree-based models like SVM)
        # use the logit function (i.e., the inverse of the logistic function) to map probabilities output
        # by sklearn's predict_proba in the range [0,1] to a real number in the range [-inf, +inf]
        model_predictions = logit(model.predict_proba(X)[:,1])
    else:
        model_predictions = model.predict(X, num_iteration=boost_round)
        
    predictions = [1 if p > 0 else -1 for p in model_predictions]
    
    return f1_score(labels, predictions)

# <code>eval_f1_uma</code>

In [40]:
def eval_f1_uma(model, boost_round, test_set, test_groups=None):
    X = test_set.iloc[:,:-1].values
    labels = test_set.iloc[:,-1].values
    
    model_predictions = []
    if boost_round < 0: # no trees have been generated (used for evaluating other non-tree-based models like SVM)
        # use the logit function (i.e., the inverse of the logistic function) to map probabilities output
        # by sklearn's predict_proba in the range [0,1] to a real number in the range [-inf, +inf]
        model_predictions = logit(model.predict_proba(X)[:,1])
    else:
        model_predictions = model.predict(X, num_iteration=boost_round)
        
    predictions = [1 if p > 0 else -1 for p in model_predictions]
    
    offset = 0
    true_labels = []
    worst_predictions = []
    
    for g in test_groups:
        true_label = labels[offset]
        true_labels.append(true_label)
        predictions_att = predictions[offset:offset+g]
        prediction_distances = np.abs(predictions_att - true_label)
        worst_predictions.append(predictions_att[np.argmax(prediction_distances)])
    
        offset += g
        
    return f1_score(true_labels, worst_predictions)

# Save/Load model

In [41]:
def save_model(model_filename, model):
    with open(model_filename, 'wb') as fout:
        pickle.dump(model, fout)

In [42]:
def load_model(model_filename):
    with open(model_filename, 'rb') as fin:
        return pickle.load(fin)

# Evaluation metrics

In [43]:
EVAL_METRICS = [eval_log_loss, 
                eval_binary_err_rate, 
                eval_roc_auc
               ]

EVAL_METRICS_UNDER_MAX_ATTACK = [eval_log_loss_uma,
                                 eval_binary_err_rate_uma, 
                                 eval_roc_auc_uma
                                ]

# Evaluate each model w.r.t. _all_ evaluation metrics

In [44]:
def eval_learned_model(model, boost_round, eval_metric, test, test_groups=None):
    return eval_metric(model, boost_round, test, test_groups=test_groups)

In [45]:
def eval_learned_models(model, model_type, boost_round, test, test_groups=None):

    eval_metrics = EVAL_METRICS
    d_test = "D_test"
    if test_groups is not None:
        eval_metrics = EVAL_METRICS_UNDER_MAX_ATTACK
        d_test = "D_test_att"
    
    header = ['Model','N. of Trees'] + [m.__name__.replace('eval_','').replace('_',' ').title() for m in eval_metrics]
    df = pd.DataFrame(columns=header)
    first_row = [model_type, boost_round] + [None for m in eval_metrics]
    df.loc[0] = first_row

    for eval_metric in eval_metrics:
        res = eval_learned_model(model, boost_round, eval_metric, test, test_groups=test_groups)
        print("{} learning - {} on {} [boost rounds={}] = {:.5f}"
                  .format(model_type, eval_metric.__name__, d_test, boost_round, res))
        df[eval_metric.__name__.replace('eval_','').replace('_',' ').title()] = res
    print("******************************************************************************************************")
    
    return df

# TRAINING

In [46]:
# custom non-interferent objective function
LOSS_OBJ_FUNC = optimize_non_interferent_log_loss
# custom non-interferent evaluation function
LOSS_EVAL_FUNC = avg_non_interferent_log_loss

In [47]:
def random_select_instances(groups, p_attacked_inst, n_attacks_per_inst, seed=73):
    
    np.random.seed(seed)
    
    i = 0
    selected_instances = []
    for g in groups:
        selected_instances.append(i)
        if n_attacks_per_inst > 0:
            if g > n_attacks_per_inst:
                if np.random.random_sample() <= p_attacked_inst: # the instance is going to be attacked
                    selected = np.random.choice(g-1, n_attacks_per_inst, replace=False) + i + 1
                    selected_instances.extend(sorted(selected))
            else:
                selected_instances.extend([x for x in range(i+1,i+g)])
        i += g
    
    return selected_instances

In [48]:
def learn(train, 
          valid, 
          params, 
          fobj,
          feval,
          num_boost_round,
          train_group,
          valid_group,
          p_attacked_inst, 
          n_attacks_per_inst
         ):
    
    lgbm_info = {}
    
    lgbm_train = lightgbm.Dataset(data=train.iloc[:,:-1].values, 
                                  label=train.iloc[:,-1].values
                                 )
    
    if train_group is not None:
        if n_attacks_per_inst > 0:
            selected_instances = random_select_instances(train_group, p_attacked_inst, n_attacks_per_inst)
            train = train.loc[selected_instances]
        
            lgbm_train = lightgbm.Dataset(data=train.iloc[:,:-1].values, 
                                          label=train.iloc[:,-1].values
                                         )
        else:

            lgbm_train = lightgbm.Dataset(data=train.iloc[:,:-1].values, 
                                          label=train.iloc[:,-1].values, 
                                          group=train_group
                                         )
        
    
    
    lgbm_valid = lightgbm.Dataset(data=valid.iloc[:,:-1].values, 
                                  label=valid.iloc[:,-1].values, 
                                  reference=lgbm_train, 
                                  free_raw_data=False
                                 )
    
    if valid_group is not None:
        lgbm_valid = lightgbm.Dataset(data=valid.iloc[:,:-1].values, 
                                      label=valid.iloc[:,-1].values, 
                                      group=valid_group,
                                      reference=lgbm_train, 
                                      free_raw_data=False
                                     )
    
    lgbm_model = lightgbm.train(params=params, 
                                train_set=lgbm_train, 
                                num_boost_round=num_boost_round, 
                                valid_sets = [lgbm_valid],
                                valid_names  = ["validation"], 
                                evals_result = lgbm_info,
                                fobj = fobj,
                                feval = feval,
                                early_stopping_rounds=50,
                                verbose_eval=20
                               )
    
    return lgbm_model, lgbm_info

In [49]:
def learning_runs(train, 
                  valid, 
                  params,
                  fobj,
                  feval,
                  num_boost_round,
                  train_group=None,
                  valid_group=None,
                  is_partial=False,
                  p_attacked_inst=1.0,
                  n_attacks_per_inst=0,
                  run_type="Standard"
                 ):
    
    learning_runs = {}
    learning_runs['type'] = run_type
    learning_runs['run'] = {}
    
    fobj_name = "" 
    if not is_partial:
        fobj_name = fobj.__name__
    else:
        fobj_name = fobj.func.__name__
    
    dataset_name = "D_train"
    if train_group is not None:
        dataset_name += "_att"
    
    for br in num_boost_round:
        print("***** {} learning - Optimizing `{}` on {} [boost rounds={}; p_attacked_inst={:.2f}; n_attacks_per_inst={}] *****"
              .format(learning_runs['type'], fobj_name, dataset_name, br, p_attacked_inst, n_attacks_per_inst))
        model, res = learn(train, 
                           valid, 
                           params,
                           fobj,
                           feval,
                           br,
                           train_group,
                           valid_group,
                           p_attacked_inst, 
                           n_attacks_per_inst
                          )
        learning_runs['run'][br] = {}
        learning_runs['run'][br]['model'] = model
        learning_runs['run'][br]['results'] = res

    return learning_runs

# 1. _Standard_ Learning: Models are trained on the original dataset $\mathcal{D}_{train}$ using _standard_ binary log loss

-  This model is trained on the original training set by minimizing standard **binary log loss** (i.e., <code>fobj=optimize_non_interferent_log_loss</code> with <code>alpha=0.0</code>)

-  Its performance is assessed by means of <code>feval=avg_non_interferent_log_loss</code>, still with <code>alpha=0.0</code>, which results into the standard <code>avg_log_loss</code> (i.e., the metric optimized during training) both on training and validation set.

-  The model leading to the lowest **binary log loss** on the validation test is the one returned.

In [50]:
%%time
std_runs = learning_runs(TRAIN, 
                          VALID,
                          STD_PARAMS, 
                          functools.partial(LOSS_OBJ_FUNC, alpha=STD_ALPHA),
                          functools.partial(LOSS_EVAL_FUNC, alpha=STD_ALPHA),
                          BOOST_ROUNDS,
                          is_partial=True,
                          run_type="Standard"
                        )

***** Standard learning - Optimizing `optimize_non_interferent_log_loss` on D_train [boost rounds=200; p_attacked_inst=1.00; n_attacks_per_inst=0] *****
Training until validation scores don't improve for 50 rounds.
[20]	validation's avg_non_interferent_log_loss [alpha=0.0]: 0.419318
[40]	validation's avg_non_interferent_log_loss [alpha=0.0]: 0.346774
[60]	validation's avg_non_interferent_log_loss [alpha=0.0]: 0.324264
[80]	validation's avg_non_interferent_log_loss [alpha=0.0]: 0.316688
[100]	validation's avg_non_interferent_log_loss [alpha=0.0]: 0.31375
[120]	validation's avg_non_interferent_log_loss [alpha=0.0]: 0.314236
[140]	validation's avg_non_interferent_log_loss [alpha=0.0]: 0.316042
Early stopping, best iteration is:
[104]	validation's avg_non_interferent_log_loss [alpha=0.0]: 0.313131
CPU times: user 25.4 s, sys: 30.9 ms, total: 25.5 s
Wall time: 842 ms


In [51]:
STD_MODEL_FILENAME = MODELS_PATH+"/std_"+ATTACKER+"_"+str(MAX_BOOST_ROUNDS)+".pkl"

# Persist _standard_ model

In [52]:
save_model(STD_MODEL_FILENAME, std_runs['run'][MAX_BOOST_ROUNDS]['model'])

# 2. _Baseline_: Learning models trained on the attacked $\mathcal{D}_{train\_att}$ using _standard_ binary log loss

-  This model is trained on the attacked training set by minimizing standard **binary log loss** (i.e., <code>fobj=optimize_non_interferent_log_loss</code> with <code>alpha=0.0</code>)

-  Its performance is assessed by means of <code>feval=avg_non_interferent_log_loss</code>, still with <code>alpha=0.0</code>, which results into the standard <code>avg_log_loss</code> (i.e., the metric optimized during training) both on training and validation set.

-  The model leading to the lowest **binary log loss** on the validation test is the one returned.

In [None]:
P_ATTACKED_INSTANCE = [0.25, 0.5, 0.75, 1.0]
N_ATTACKS_PER_INSTANCE = [1, 1000]

# Train and Persist _baseline_ model

In [None]:
%%time
for pa in P_ATTACKED_INSTANCE:
    for na in N_ATTACKS_PER_INSTANCE:
        baseline_runs = learning_runs(TRAIN_ATT, 
                                       VALID_ATT, 
                                       STD_PARAMS,
                                       functools.partial(LOSS_OBJ_FUNC, alpha=STD_ALPHA),
                                       functools.partial(LOSS_EVAL_FUNC, alpha=STD_ALPHA),
                                       BOOST_ROUNDS,
                                       train_group=TRAIN_ATT_OFFSETS,
                                       is_partial=True,
                                       p_attacked_inst=pa, 
                                       n_attacks_per_inst=na,
                                       run_type="Baseline"
                                    )

        BASELINE_MODEL_FILENAME = MODELS_PATH+"/baseline_"+ATTACKER+"_"+str(MAX_BOOST_ROUNDS)+"_p-{}_n-{}.pkl".format(int(pa*100), na)
        save_model(BASELINE_MODEL_FILENAME, baseline_runs['run'][MAX_BOOST_ROUNDS]['model'])

# 3. _Full-Non-Interferent_: Learn full _non-interferent_ models trained on the attacked $\mathcal{D}_{train\_att}$ using _only_ our custom objective function (binary log loss under max attack)

-  This model is trained on the original training set by minimizing our custom objective function, i.e., **binary log loss under max attack** (i.e., <code>fobj=optimize_non_interferent_log_loss</code> with <code>alpha=1.0</code>).

-  Its performance is assessed by means of <code>feval=avg_non_interferent_log_loss</code>, still with <code>alpha=1.0</code>, which results into the custom <code>avg_log_loss_uma</code>, both on training and validation set.

-  The model leading to the lowest <code>avg_log_loss_uma</code> on the validation test is the one returned.

In [None]:
%%time
full_non_interferent_runs = learning_runs(TRAIN_ATT, 
                                     VALID_ATT,
                                     NON_INTERFERENT_PARAMS,
                                     functools.partial(LOSS_OBJ_FUNC, alpha=NON_INTERFERENT_ALPHA),
                                     functools.partial(LOSS_EVAL_FUNC, alpha=NON_INTERFERENT_ALPHA),
                                     BOOST_ROUNDS,
                                     train_group=TRAIN_ATT_OFFSETS,
                                     valid_group=VALID_ATT_OFFSETS,
                                     is_partial=True,
                                     run_type="Full-Non-Interferent"
                                    )

In [None]:
FULL_NON_INTERFERENT_MODEL_FILENAME = MODELS_PATH+"/full-non-interferent_"+ATTACKER+"_"+str(MAX_BOOST_ROUNDS)+".pkl"

# Persist _full-non-interferent_ model

In [None]:
save_model(FULL_NON_INTERFERENT_MODEL_FILENAME, full_non_interferent_runs['run'][MAX_BOOST_ROUNDS]['model'])

# 4. _Non-Interferent_ weighted: Learn _non-interferent_ models trained on the attacked $\mathcal{D}_{train\_att}$ using custom cost function (weighted sum of log loss and log loss under max attack)

-  This model is trained on the original training set by minimizing our custom objective function, i.e., the weighted sum of the standard **binary log loss** and **binary log loss under max attack** (i.e., <code>fobj=optimize_non_interferent_log_loss</code> with <code>0 < alpha < 1</code>).

-  Its performance is assessed by means of <code>feval=avg_non_interferent_log_loss</code>, still with the same value of <code>alpha</code> used during training, which results into the custom <code>avg_non_interferent_log_loss</code>, both on training and validation set.

-  The model leading to the lowest <code>avg_non_interferent_log_loss</code> on the validation test is the one returned.

In [None]:
ALPHAS = [0.10, 0.25, 0.50, 0.75, 0.90]

# Train and Persist _non-interferent_ weighted model

In [None]:
%%time
for alpha in ALPHAS:
    weighted_non_interferent_runs = learning_runs(TRAIN_ATT, 
                                              VALID_ATT, 
                                              NON_INTERFERENT_PARAMS,
                                              functools.partial(LOSS_OBJ_FUNC, alpha=alpha),
                                              functools.partial(LOSS_EVAL_FUNC, alpha=alpha),
                                              BOOST_ROUNDS,
                                              train_group=TRAIN_ATT_OFFSETS,
                                              valid_group=VALID_ATT_OFFSETS,
                                              is_partial=True,
                                              run_type="Weighted-Non-Interferent"
                                             )
    WEIGHTED_NON_INTERFERENT_MODEL_FILENAME = MODELS_PATH+"/weighted-non-interferent_"+ATTACKER+"_"+str(MAX_BOOST_ROUNDS)+"_alpha-{}.pkl".format(int(alpha*100))
    save_model(WEIGHTED_NON_INTERFERENT_MODEL_FILENAME, weighted_non_interferent_runs['run'][MAX_BOOST_ROUNDS]['model'])

# 5. SVM: Learn SVM classifier trained on the original $\mathcal{D}_{\text{train}}$

In [None]:
SVM_HYPERPARAMS = {
    'C': [0.001, 0.01, 0.1, 1, 10],
    #'gamma': [0.001, 0.01, 0.1, 1]
}

In [None]:
def train_svm(X, y, hyperparams, nfolds=5, scoring='neg_log_loss'):
    grid_search = GridSearchCV(SVC(kernel='rbf', probability=True), 
                               hyperparams, 
                               cv=nfolds, 
                               scoring=scoring, 
                               n_jobs=-1, 
                               verbose=4)
    grid_search.fit(X, y)
    
    return grid_search

In [None]:
X = TRAIN_VALID.iloc[:,:-1].values
y = TRAIN_VALID.iloc[:,-1].values

In [None]:
y[y == -1] = 0

In [None]:
svm_grid = train_svm(X, y, SVM_HYPERPARAMS) 

# Persist SVM model

In [None]:
best_C = svm_grid.best_params_['C']

In [None]:
SVM_MODEL_FILENAME = MODELS_PATH+"/svm_"+ATTACKER+"_C-{}.pkl".format(str(best_C).replace('.',''))

In [None]:
save_model(SVM_MODEL_FILENAME, svm_grid)

# EVALUATION

In [None]:
MAX_EVAL_TREES = 200
MIN_EVAL_TREES = 10
STEP_EVAL_TREES = 10
EVAL_TREES = sorted(list(set([t for t in range(MIN_EVAL_TREES, MAX_EVAL_TREES, STEP_EVAL_TREES)] + [MAX_EVAL_TREES])))
# The following adds the "best_iteration" learned on the validation set
EVAL_TREES = [0] + EVAL_TREES

In [None]:
def eval_runs(model, model_type, test, eval_trees=EVAL_TREES, test_groups=None):
    eval_results = []
    for t in eval_trees:
        eval_results.append(eval_learned_models(model, model_type, t, test, test_groups=test_groups))
        
    eval_df = pd.concat(eval_results, axis=0)
    eval_df.reset_index(inplace=True, drop=True)

    return eval_df

# Retrieve all model files

In [None]:
def get_model_filenames():
    return sorted([f for f in listdir(MODELS_PATH) if f != '.gitignore' and isfile(join(MODELS_PATH, f))])

In [None]:
all_model_filenames = get_model_filenames()
print("\n".join([mf for mf in all_model_filenames]))

# Load _standard_ models

In [None]:
STD_MODEL_FILENAME = MODELS_PATH+"/std_"+ATTACKER+"_"+str(MAX_BOOST_ROUNDS)+".pkl"

In [None]:
std_model = load_model(STD_MODEL_FILENAME)

# Evaluate _standard_ models on $D_{test}$

In [None]:
std_df = eval_runs(std_model, "Standard", TEST)

In [None]:
std_df.head()

# Evaluate _standard_ models on $D_{test\_att}$

In [None]:
std_att_df = eval_runs(std_model, "Standard", TEST_ATT, test_groups=TEST_ATT_OFFSETS)

# Merge both _standard_ evaluations

In [None]:
overall_std_df = pd.merge(std_df, std_att_df, on=['Model', 'N. of Trees'])

In [None]:
overall_std_df.head()

# Load _baseline_ models

In [None]:
BASELINE_MODEL_100_1_FILENAME = MODELS_PATH+"/baseline_"+ATTACKER+"_"+str(MAX_BOOST_ROUNDS)+"_p-100_n-1.pkl"
#BASELINE_MODEL_100_4_FILENAME = MODELS_PATH+"/baseline_"+ATTACKER+"_"+str(MAX_BOOST_ROUNDS)+"_p-100_n-4.pkl"
BASELINE_MODEL_100_MAX_FILENAME = MODELS_PATH+"/baseline_"+ATTACKER+"_"+str(MAX_BOOST_ROUNDS)+"_p-100_n-1000.pkl"
#BASELINE_MODEL_50_1_FILENAME = MODELS_PATH+"/baseline_"+ATTACKER+"+"_"+str(MAX_BOOST_ROUNDS)+"_p-50_n-1.pkl"
#BASELINE_MODEL_50_4_FILENAME = MODELS_PATH+"/baseline_"+ATTACKER+"_"+str(MAX_BOOST_ROUNDS)+"_p-50_n-4.pkl"
#BASELINE_MODEL_50_MAX_FILENAME = MODELS_PATH+"/baseline_"+ATTACKER+"_"+str(MAX_BOOST_ROUNDS)+"_p-50_n-1000.pkl"

In [None]:
baseline_model_100_1 = load_model(BASELINE_MODEL_100_1_FILENAME)
#baseline_model_100_4 = load_model(BASELINE_MODEL_100_4_FILENAME)
baseline_model_100_MAX = load_model(BASELINE_MODEL_100_MAX_FILENAME)
# baseline_model_50_1 = load_model(BASELINE_MODEL_50_1_FILENAME)
# baseline_model_50_4 = load_model(BASELINE_MODEL_50_4_FILENAME)
# baseline_model_50_MAX = load_model(BASELINE_MODEL_50_MAX_FILENAME)

# Evaluate _baseline_ models on $D_{test}$

In [None]:
baseline_100_1_df = eval_runs(baseline_model_100_1, "Baseline [p=1.0; n=1]", TEST)
# baseline_100_4_df = eval_runs(baseline_model_100_4, "Baseline [p=1.0; n=4]", TEST)
baseline_100_max_df = eval_runs(baseline_model_100_MAX, "Baseline [p=1.0; n=max]", TEST)
# baseline_50_1_df = eval_runs(baseline_model_50_1, "Baseline [p=0.5; n=1]", TEST)
# baseline_50_4_df = eval_runs(baseline_model_50_4, "Baseline [p=0.5; n=4]", TEST)
# baseline_50_max_df = eval_runs(baseline_model_50_MAX, "Baseline [p=0.5; n=max]", TEST)

In [None]:
baseline_df = pd.concat(#[baseline_100_1_df, baseline_100_4_df, baseline_100_max_df, baseline_50_1_df, baseline_50_4_df, baseline_50_max_df], 
                        [baseline_100_1_df, baseline_100_max_df], 
                        axis=0)
baseline_df.reset_index(inplace=True, drop=True)

In [None]:
baseline_df.head()

# Evaluate _baseline_ model on $D_{test\_att}$

In [None]:
baseline_att_100_1_df = eval_runs(baseline_model_100_1, "Baseline [p=1.0; n=1]", TEST_ATT, test_groups=TEST_ATT_OFFSETS)
#baseline_att_100_4_df = eval_runs(baseline_model_100_4, "Baseline [p=1.0; n=4]", TEST_ATT, test_groups=TEST_ATT_OFFSETS)
baseline_att_100_max_df = eval_runs(baseline_model_100_MAX, "Baseline [p=1.0; n=max]", TEST_ATT, test_groups=TEST_ATT_OFFSETS)
#baseline_att_50_1_df = eval_runs(baseline_model_50_1, "Baseline [p=0.5; n=1]", TEST_ATT, test_groups=TEST_ATT_OFFSETS)
#baseline_att_50_4_df = eval_runs(baseline_model_50_4, "Baseline [p=0.5; n=4]", TEST_ATT, test_groups=TEST_ATT_OFFSETS)
#baseline_att_50_max_df = eval_runs(baseline_model_50_MAX, "Baseline [p=0.5; n=max]", TEST_ATT, test_groups=TEST_ATT_OFFSETS)

In [None]:
baseline_att_df = pd.concat(# [baseline_att_100_1_df, baseline_att_100_4_df, baseline_att_100_max_df, baseline_att_50_1_df, baseline_att_50_4_df, baseline_att_50_max_df]
    [baseline_att_100_1_df, baseline_att_100_max_df], axis=0)
baseline_att_df.reset_index(inplace=True, drop=True)

In [None]:
baseline_att_df.head()

# Merge both _baseline_ evaluations

In [None]:
overall_baseline_df = pd.merge(baseline_df, baseline_att_df, on=["Model", "N. of Trees"])

In [None]:
overall_baseline_df.head()

# Load _full-non-interferent_ model

In [None]:
FULL_NON_INTERFERENT_MODEL_FILENAME = MODELS_PATH+"/full-non-interferent_"+ATTACKER+"_"+str(MAX_BOOST_ROUNDS)+".pkl"

In [None]:
full_non_interferent_model = load_model(FULL_NON_INTERFERENT_MODEL_FILENAME)

# Evaluate _full-non-interferent_ model on $D_{test}$

In [None]:
full_non_interferent_df = eval_runs(full_non_interferent_model, "Full-Non-Interferent", TEST)

# Evaluate _full-non-interferent_ model on $D_{test\_att}$

In [None]:
full_non_interferent_att_df = eval_runs(full_non_interferent_model, "Full-Non-Interferent", TEST_ATT, test_groups=TEST_ATT_OFFSETS)

# Merge both _full-non-interferent_ evaluations

In [None]:
overall_full_non_interferent_df = pd.merge(full_non_interferent_df, full_non_interferent_att_df, on=['Model', 'N. of Trees'])

# Load _weighted-non-interferent_ models

In [None]:
WEIGHTED_NON_INTERFERENT_MODEL_10_FILENAME = MODELS_PATH+"/weighted-non-interferent_"+ATTACKER+"_"+str(MAX_BOOST_ROUNDS)+"_alpha-10.pkl"
WEIGHTED_NON_INTERFERENT_MODEL_25_FILENAME = MODELS_PATH+"/weighted-non-interferent_"+ATTACKER+"_"+str(MAX_BOOST_ROUNDS)+"_alpha-25.pkl"
WEIGHTED_NON_INTERFERENT_MODEL_50_FILENAME = MODELS_PATH+"/weighted-non-interferent_"+ATTACKER+"_"+str(MAX_BOOST_ROUNDS)+"_alpha-50.pkl"
WEIGHTED_NON_INTERFERENT_MODEL_75_FILENAME = MODELS_PATH+"/weighted-non-interferent_"+ATTACKER+"_"+str(MAX_BOOST_ROUNDS)+"_alpha-75.pkl"
WEIGHTED_NON_INTERFERENT_MODEL_90_FILENAME = MODELS_PATH+"/weighted-non-interferent_"+ATTACKER+"_"+str(MAX_BOOST_ROUNDS)+"_alpha-90.pkl"

In [None]:
weighted_non_interferent_model_10 = load_model(WEIGHTED_NON_INTERFERENT_MODEL_10_FILENAME)
weighted_non_interferent_model_25 = load_model(WEIGHTED_NON_INTERFERENT_MODEL_25_FILENAME)
weighted_non_interferent_model_50 = load_model(WEIGHTED_NON_INTERFERENT_MODEL_50_FILENAME)
weighted_non_interferent_model_75 = load_model(WEIGHTED_NON_INTERFERENT_MODEL_75_FILENAME)
weighted_non_interferent_model_90 = load_model(WEIGHTED_NON_INTERFERENT_MODEL_90_FILENAME)

# Evaluate _weighted-non-interferent_ model on $D_{test}$

In [None]:
weighted_non_interferent_10_df = eval_runs(weighted_non_interferent_model_10, "Weighted-Non-Interferent [alpha=0.10]", TEST)
weighted_non_interferent_25_df = eval_runs(weighted_non_interferent_model_25, "Weighted-Non-Interferent [alpha=0.25]", TEST)
weighted_non_interferent_50_df = eval_runs(weighted_non_interferent_model_50, "Weighted-Non-Interferent [alpha=0.50]", TEST)
weighted_non_interferent_75_df = eval_runs(weighted_non_interferent_model_75, "Weighted-Non-Interferent [alpha=0.75]", TEST)
weighted_non_interferent_90_df = eval_runs(weighted_non_interferent_model_90, "Weighted-Non-Interferent [alpha=0.90]", TEST)

In [None]:
weighted_non_interferent_df = pd.concat([weighted_non_interferent_10_df, 
                                             weighted_non_interferent_25_df,
                                             weighted_non_interferent_50_df,
                                             weighted_non_interferent_75_df,
                                             weighted_non_interferent_90_df], axis=0)

weighted_non_interferent_df.reset_index(inplace=True, drop=True)

# Evaluate _weighted-non-interferent_ model on $D_{test\_att}$

In [None]:
weighted_non_interferent_att_10_df = eval_runs(weighted_non_interferent_model_10, "Weighted-Non-Interferent [alpha=0.10]", TEST_ATT, test_groups=TEST_ATT_OFFSETS)
weighted_non_interferent_att_25_df = eval_runs(weighted_non_interferent_model_25, "Weighted-Non-Interferent [alpha=0.25]", TEST_ATT, test_groups=TEST_ATT_OFFSETS)
weighted_non_interferent_att_50_df = eval_runs(weighted_non_interferent_model_50, "Weighted-Non-Interferent [alpha=0.50]", TEST_ATT, test_groups=TEST_ATT_OFFSETS)
weighted_non_interferent_att_75_df = eval_runs(weighted_non_interferent_model_75, "Weighted-Non-Interferent [alpha=0.75]", TEST_ATT, test_groups=TEST_ATT_OFFSETS)
weighted_non_interferent_att_90_df = eval_runs(weighted_non_interferent_model_90, "Weighted-Non-Interferent [alpha=0.90]", TEST_ATT, test_groups=TEST_ATT_OFFSETS)

In [None]:
weighted_non_interferent_att_df = pd.concat([weighted_non_interferent_att_10_df, 
                                             weighted_non_interferent_att_25_df,
                                             weighted_non_interferent_att_50_df,
                                             weighted_non_interferent_att_75_df,
                                             weighted_non_interferent_att_90_df], axis=0)

weighted_non_interferent_att_df.reset_index(inplace=True, drop=True)

# Merge both _weighted-non-interferent_ evaluations

In [None]:
overall_weighted_non_interferent_df = pd.merge(weighted_non_interferent_df, weighted_non_interferent_att_df, on=['Model', 'N. of Trees'])

# Load SVM model

In [None]:
SVM_MODEL_FILENAME = MODELS_PATH+"/svm_"+ATTACKER+"_C-10.pkl"

In [None]:
svm_model = load_model(SVM_MODEL_FILENAME)

# Evaluate SVM model on $D_{test}$

In [None]:
svm_df = eval_runs(svm_model, "SVM [C=10]", TEST, eval_trees=[-1])

# Evaluate SVM model on $D_{test\_att}$

In [None]:
svm_att_df = eval_runs(svm_model, "SVM [C=10]", TEST_ATT, test_groups=TEST_ATT_OFFSETS, eval_trees=[-1])

# Merge both SVM evaluations

In [None]:
overall_svm_df = pd.merge(svm_df, svm_att_df, on=['Model', 'N. of Trees'])

# Stack _all_ evaluations one on top of each other

In [None]:
overall_df = pd.concat([overall_full_non_interferent_df, 
                        overall_weighted_non_interferent_df,
                        overall_svm_df,
                        overall_baseline_df, 
                        overall_std_df], axis=0)

overall_df.reset_index(inplace=True, drop=True)

In [None]:
overall_df.head()

# Save the DataFrame containing results

In [None]:
overall_df.to_csv("../plots/"+ATTACKER+".csv", sep=",", index=False)