# Evaluating Models

This notebook contains the code used for evaluating the following learning models:

-  **Standard GBDT** (_baseline 1_)
-  **Adversarial Boosting** (_baseline 2_)
-  **Non-Interferent GBDT** (our proposal)

# Documentation

 - http://lightgbm.readthedocs.io/en/latest/
 - http://lightgbm.readthedocs.io/en/latest/Python-Intro.html
 - https://github.com/Microsoft/LightGBM/tree/master/examples/python-guide

In [2]:
import os
import json
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import lightgbm
import functools
from os import listdir
from os.path import isfile, join
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import roc_auc_score, f1_score
# Adding the following line, allows Jupyter Notebook to visualize plots
# produced by matplotlib directly below the code cell which generated those.
%matplotlib inline

np.random.seed(73)

# Standard evaluation metric

The following function is the one used for evaluating the quality of the learned model (either _standard_, _adversarial-boosting_, or _non-interferent_). This is the standard <code>avg_log_loss</code>.

In [3]:
def logistic(x):
    return 1.0/(1.0 + np.exp(-x))

In [4]:
def logit(p):
    return np.log(p/(1-p))

# <code>avg_log_loss</code>

In [5]:
# self-defined eval metric
# f(preds: array, train_data: Dataset) -> name: string, value: array, is_higher_better: bool
def avg_log_loss(preds, train_data):
    
    labels = train_data.get_label()
    losses = np.log(1.0 + np.exp(-preds*labels))
    avg_loss = np.mean(losses)
    
    return 'avg_binary_log_loss', avg_loss, False

In [6]:
def eval_log_loss(model, test, test_groups=None, svm=False):
    
    lgbm_test = lightgbm.Dataset(data=test.iloc[:,:-1].values, 
                                 label=test.iloc[:,-1].values,
                                 free_raw_data=False)
    
    if svm: # no trees have been generated (used for evaluating other non-tree-based models like SVM)
        # use the logit function (i.e., the inverse of the logistic function) to map probabilities output
        # by sklearn's predict_proba in the range [0,1] to a real number in the range [-inf, +inf]
        return avg_log_loss(logit(model.predict_proba(test.iloc[:,:-1].values)[:,1]), lgbm_test)[1]
    
    return avg_log_loss(model.predict(test.iloc[:,:-1].values), lgbm_test)[1]

# Custom evaluation metric

Similarly to what we have done for <code>fobj</code>, <code>feval</code> can be computed from a weighted combination of two evaluation metrics:

-  <code>avg_log_loss</code> (standard, defined above);
-  <code>avg_log_loss_uma</code> (custom, defined below).

# <code>avg_log_loss_uma</code>

This is the binary log loss yet modified to operate on groups of perturbed instances.

In [7]:
# Our custom metric

def binary_log_loss(pred, true_label):

    return np.log(1.0 + np.exp(-pred * true_label))

# self-defined eval metric
# f(preds: array, train_data: Dataset) -> name: string, value: array, is_higher_better: bool
def avg_log_loss_uma(preds, train_data):
    labels = train_data.get_label()
    attack_lens = train_data.get_group()
    
    offset = 0
    max_logloss = []
    avg_max_logloss = 0.0
    
    if attack_lens is not None:
    
        for atk in attack_lens:
            losses = [binary_log_loss(h,t) for h,t in zip(preds[offset:offset+atk], labels[offset:offset+atk])]
            max_logloss.append(max(losses))

            offset += atk
        
        avg_max_logloss = np.mean(max_logloss)  

    return 'avg_binary_log_loss_under_max_attack', avg_max_logloss, False

In [8]:
def eval_log_loss_uma(model, test, test_groups=None, svm=False):
    
    lgbm_test = lightgbm.Dataset(data=test.iloc[:,:-1].values, 
                                 label=test.iloc[:,-1].values,
                                 group=test_groups,
                                 free_raw_data=False)
    
    if svm: # no trees have been generated (used for evaluating other non-tree-based models like SVM)
        # use the logit function (i.e., the inverse of the logistic function) to map probabilities output
        # by sklearn's predict_proba in the range [0,1] to a real number in the range [-inf, +inf]
        return avg_log_loss_uma(logit(model.predict_proba(test.iloc[:,:-1].values)[:,1]), 
                                               lgbm_test)[1]
    
    return avg_log_loss_uma(model.predict(test.iloc[:,:-1].values), 
                                               lgbm_test)[1]

# <code>feval=avg_non_interferent_log_loss</code>

Used for measuring the validity of any model (either _standard_, _baseline_, or _non-interferent_). More precisely, <code>avg_non_interferent_log_loss</code> is the weighted sum of the binary log loss and the binary log loss under maximal attack.

In [9]:
# LightGBM takes lambda x,y: avg_weighted_sum_log_loss_log_loss_uma(preds, train_data, alpha=0.5)

def avg_non_interferent_log_loss(preds, train_data, alpha=1.0):
    
    # binary logloss under maximal attack
    _, loss_uma, _    = avg_log_loss_uma(preds, train_data)
    
    # binary logloss (plain)
    # _, loss_plain, _  = avg_log_loss(preds, train_data)
    
    ids = []
    attack_lens = train_data.get_group()
    
    if attack_lens is not None:
        offset=0
        for atk in attack_lens:
            ids += [offset]
            offset += atk      
            
    ids = np.array(ids)
    labels = train_data.get_label()
    losses = np.log(1.0 + np.exp(-preds[ids]*labels[ids]))
    loss_plain = np.mean(losses)

    # combine the above two losses together
    weighted_loss = alpha*loss_uma + (1.0-alpha)*loss_plain

    return 'avg_non_interferent_log_loss [alpha={:.2f}]'.format(alpha), weighted_loss, False

def eval_non_interferent_log_loss(model, test, test_groups=None, svm=False, alpha=1.0):
    
    lgbm_test = lightgbm.Dataset(data=test.iloc[:,:-1].values, 
                                 label=test.iloc[:,-1].values,
                                 group=test_groups,
                                 free_raw_data=False)
    
    if svm: # no trees have been generated (used for evaluating other non-tree-based models like SVM)
        # use the logit function (i.e., the inverse of the logistic function) to map probabilities output
        # by sklearn's predict_proba in the range [0,1] to a real number in the range [-inf, +inf]
        return avg_non_interferent_log_loss(logit(model.predict_proba(test.iloc[:,:-1].values)[:,1]), 
                                                  lgbm_test,
                                                  alpha=alpha
                                                 )[1]
    
    return avg_non_interferent_log_loss(model.predict(test.iloc[:,:-1].values), 
                                                  lgbm_test,
                                                  alpha=alpha
                                                 )[1]

# Additional validity measures

In addition to the evaluation metrics defined above (used for training), we also consider the following **4** measures of validity to compare the performance of each learned model:

-  <code>eval_binary_err_rate</code>: This is the traditional binary error rate (1-accuracy);
-  <code>eval_binary_err_rate_uma</code>: This is the binary error rate modified to operate on groups of perturbed instances under maximal attack.
-  <code>eval_roc_auc</code>: This is the classical ROC AUC score;
-  <code>eval_roc_auc_uma</code>: This is the ROC AUC score modified to operate on groups of perturbed instances under maximal attack.

Again, note that those are **not** metrics used at training time (i.e., they do not define any <code>feval</code>), rather they are used to assess the (offline) quality of each learned model.

# <code>eval_binary_err_rate</code>

In [10]:
def eval_binary_err_rate(model, test_set, test_groups=None, svm=False):
    X = test_set.iloc[:,:-1].values
    labels = test_set.iloc[:,-1].values
    
    model_predictions = []
    if svm: # no trees have been generated (used for evaluating other non-tree-based models like SVM)
        # use the logit function (i.e., the inverse of the logistic function) to map probabilities output
        # by sklearn's predict_proba in the range [0,1] to a real number in the range [-inf, +inf]
        model_predictions = logit(model.predict_proba(X)[:,1])
    else:
        model_predictions = model.predict(X)
        
    predictions = [1 if p > 0 else -1 for p in model_predictions]
    
    errs = 0
    for p,l in zip(predictions,labels):
        if p != l:
            errs += 1
    return errs/len(predictions)

# <code>eval_binary_err_rate_uma</code>

In [11]:
def eval_binary_err_rate_uma(model, test_set, test_groups=None, svm=False):
    X = test_set.iloc[:,:-1].values
    labels = test_set.iloc[:,-1].values
    
    model_predictions = []
    if svm: # no trees have been generated (used for evaluating other non-tree-based models like SVM)
        # use the logit function (i.e., the inverse of the logistic function) to map probabilities output
        # by sklearn's predict_proba in the range [0,1] to a real number in the range [-inf, +inf]
        model_predictions = logit(model.predict_proba(X)[:,1])
    else:
        model_predictions = model.predict(X)
        
    predictions = [1 if p > 0 else -1 for p in model_predictions]
    
    offset = 0
    errs = 0

    for g in test_groups:
        predictions_att = predictions[offset:offset+g]
        true_label = labels[offset]
        if np.any([p != true_label for p in predictions_att]):
            errs += 1
        offset += g

    return errs/len(test_groups)

# <code>eval_roc_auc</code>

In [12]:
def eval_roc_auc(model, test_set, test_groups=None, svm=False):
    X = test_set.iloc[:,:-1].values
    labels = test_set.iloc[:,-1].values
    
    predictions = []
    
    if svm: # no trees have been generated (used for evaluating other non-tree-based models like SVM)
        # use the logit function (i.e., the inverse of the logistic function) to map probabilities output
        # by sklearn's predict_proba in the range [0,1] to a real number in the range [-inf, +inf]
        predictions = logit(model.predict_proba(X)[:,1])
    else:
        predictions = model.predict(X)
        
    
    return roc_auc_score(y_true=labels, y_score=predictions)

# <code>eval_roc_auc_uma</code>

In [13]:
def eval_roc_auc_uma(model, test_set, test_groups=None, svm=False):
    X = test_set.iloc[:,:-1].values
    labels = test_set.iloc[:,-1].values
    
    predictions = []
    
    if svm: # no trees have been generated (used for evaluating other non-tree-based models like SVM)
        # use the logit function (i.e., the inverse of the logistic function) to map probabilities output
        # by sklearn's predict_proba in the range [0,1] to a real number in the range [-inf, +inf]
        predictions = logit(model.predict_proba(X)[:,1])
    else:
        predictions = model.predict(X)
    
    
    offset = 0
    true_labels = []
    worst_predictions = []
    
    for g in test_groups:
        
        true_label = labels[offset]
        true_labels.append(true_label)
        predictions_att = predictions[offset:offset+g]
        if true_label == 1:
            worst_predictions.append(np.min(predictions_att))
        else:
            worst_predictions.append(np.max(predictions_att))
    
        offset += g
        
    return roc_auc_score(y_true=true_labels, y_score=worst_predictions)

# <code>eval_f1</code>

In [14]:
def eval_f1(model, test_set, test_groups=None, svm=False):
    X = test_set.iloc[:,:-1].values
    labels = test_set.iloc[:,-1].values
    
    model_predictions = []
    if svm: # no trees have been generated (used for evaluating other non-tree-based models like SVM)
        # use the logit function (i.e., the inverse of the logistic function) to map probabilities output
        # by sklearn's predict_proba in the range [0,1] to a real number in the range [-inf, +inf]
        model_predictions = logit(model.predict_proba(X)[:,1])
    else:
        model_predictions = model.predict(X)
        
    predictions = [1 if p > 0 else -1 for p in model_predictions]
    
    return f1_score(y_true=labels, y_pred=predictions)

# <code>eval_f1_uma</code>

In [15]:
def eval_f1_uma(model, test_set, test_groups=None, svm=False):
    X = test_set.iloc[:,:-1].values
    labels = test_set.iloc[:,-1].values
    
    model_predictions = []
    if svm: # no trees have been generated (used for evaluating other non-tree-based models like SVM)
        # use the logit function (i.e., the inverse of the logistic function) to map probabilities output
        # by sklearn's predict_proba in the range [0,1] to a real number in the range [-inf, +inf]
        model_predictions = logit(model.predict_proba(X)[:,1])
    else:
        model_predictions = model.predict(X)
        
    predictions = [1 if p > 0 else -1 for p in model_predictions]
    
    offset = 0
    true_labels = []
    worst_predictions = []
    
    for g in test_groups:
        true_label = labels[offset]
        true_labels.append(true_label)
        predictions_att = predictions[offset:offset+g]
        if true_label == 1:
            worst_predictions.append(np.min(predictions_att))
        else:
            worst_predictions.append(np.max(predictions_att))
    
        offset += g
        
    return f1_score(y_true=true_labels, y_pred=worst_predictions)

# Evaluation metrics

In [16]:
EVAL_METRICS = [eval_log_loss, 
                eval_binary_err_rate, 
                eval_f1,
                eval_roc_auc
               ]

EVAL_METRICS_UNDER_MAX_ATTACK = [eval_log_loss_uma,
                                 eval_binary_err_rate_uma, 
                                 eval_f1_uma,
                                 eval_roc_auc_uma
                                ]

# Evaluate each model w.r.t. _all_ evaluation metrics

In [17]:
def eval_learned_model(model, eval_metric, test, test_groups=None):
    return eval_metric(model, test, test_groups=test_groups)

In [18]:
def eval_learned_models(model, model_type, test, test_groups=None):

    eval_metrics = EVAL_METRICS
    d_test = "D_test"
    if test_groups is not None:
        eval_metrics = EVAL_METRICS_UNDER_MAX_ATTACK
        d_test = "D_test_att"
    
    header = ['Model'] + [m.__name__.replace('eval_','').replace('_',' ').title() for m in eval_metrics]
    df = pd.DataFrame(columns=header)
    first_row = [model_type] + [None for m in eval_metrics]
    df.loc[0] = first_row

    for eval_metric in eval_metrics:
        res = eval_learned_model(model, eval_metric, test, test_groups=test_groups)
        print("{} learning - {} on {} = {:.5f}"
                  .format(model_type, eval_metric.__name__, d_test, res))
        df[eval_metric.__name__.replace('eval_','').replace('_',' ').title()] = res
    print("******************************************************************************************************")
    
    return df

# Loading Datasets

In [19]:
def label_encode(dataset, categorical_features):
    dataset_le = dataset.copy()
    for column in dataset_le.columns:
        if column in categorical_features:
            dataset_le[column] = dataset_le[column].astype('category')
            dataset_le[column] = dataset_le[column].cat.codes.astype(np.int32)
    return dataset_le

def load_atk_train_valid_test(atk_train_file, atk_valid_file, atk_test_file, 
                              train_split=0.6, valid_split=0.2, force=False):
    
    
    if  (force or 
          not os.path.exists(atk_train_file+".cat.bz2") or
          not os.path.exists(atk_valid_file+".cat.bz2") or
          not os.path.exists(atk_test_file+".cat.bz2") or 
          not os.path.exists(atk_train_file+".cat.json") ):
    
        print ("Pre-processing original files...")

        print ("Loading:", atk_train_file)
        print ("Loading:", atk_valid_file)
        print ("Loading:", atk_test_file)

        train = pd.read_csv(atk_train_file)
        valid = pd.read_csv(atk_valid_file)
        test  = pd.read_csv(atk_test_file)
        
        print ("Train/Valid/Test sizes:", train.shape, valid.shape, test.shape)
        print ("Train/Valid/Test split: {:.2f} {:.2f} {:.2f}"
                   .format( train.shape[0]/(train.shape[0]+valid.shape[0]+test.shape[0]),
                            valid.shape[0]/(train.shape[0]+valid.shape[0]+test.shape[0]),
                            test.shape[0] /(train.shape[0]+valid.shape[0]+test.shape[0]) ) )


        # split-back into train valid test
        if 'instance_id' in train.columns.values:
            print ('with instance ids')
            valid['instance_id'] += train.iloc[-1,0]
            test['instance_id']  += valid.iloc[-1,0]
            
            groups = np.concatenate( [ train['instance_id'].value_counts().sort_index().values,
                                       valid['instance_id'].value_counts().sort_index().values,
                                       test['instance_id'].value_counts().sort_index().values ] )
            
            num_train_groups = int( len(groups)*train_split )
            train_size = sum(groups[:num_train_groups])
            num_valid_groups = int( len(groups)*valid_split )
            valid_size = sum(groups[num_train_groups:num_train_groups+num_valid_groups])
        else:
            full_size = len(train) + len(valid) + len(test)
            train_size = int( full_size*train_split )
            valid_size = int( full_size*valid_split )
        
        # concat to process correctly label encoding
        full = pd.concat( [train, valid, test] )

        # get index of categorical features (-1 because of instance_id)
        cat_fx = full.columns.values[np.where(full.dtypes=='object')[0]]
        cat_fx = list(cat_fx)    
        full = label_encode(full, cat_fx)
        with open(atk_train_file+".cat.json", 'w') as fp:
            json.dump(cat_fx, fp)
        print ("CatFX:", cat_fx)

        train_cat = full.iloc[0:train_size,:]
        valid_cat = full.iloc[train_size:train_size+valid_size,:]
        test_cat  = full.iloc[train_size+valid_size:,:]    

        print ("Train/Valid/Test sizes:", train_cat.shape, valid_cat.shape, test_cat.shape)
        print ("Train/Valid/Test split: {:.2f} {:.2f} {:.2f}"
                   .format( train_cat.shape[0]/(train_cat.shape[0]+valid_cat.shape[0]+test_cat.shape[0]),
                            valid_cat.shape[0]/(train_cat.shape[0]+valid_cat.shape[0]+test_cat.shape[0]),
                            test_cat.shape[0] /(train_cat.shape[0]+valid_cat.shape[0]+test_cat.shape[0]) ) )

        # save to file
        print ("Saving processed files *.cat.bz2")
        train_cat.to_csv(atk_train_file+".cat.bz2", compression="bz2", index=False)
        valid_cat.to_csv(atk_valid_file+".cat.bz2", compression="bz2", index=False)
        test_cat.to_csv (atk_test_file+".cat.bz2",  compression="bz2", index=False)
        
    else:
        print ("Loading pre-processed files...")

        train_cat = pd.read_csv(atk_train_file+".cat.bz2")
        valid_cat = pd.read_csv(atk_valid_file+".cat.bz2")
        test_cat  = pd.read_csv(atk_test_file+".cat.bz2")
        
        with open(atk_train_file+".cat.json", 'r') as fp:
            cat_fx = json.load(fp)
    
    # return data
    return train_cat, valid_cat, test_cat, cat_fx

In [20]:
def eval_runs(model, model_type, test, eval_trees=10, test_groups=None):
    eval_results = []
    for t in eval_trees:
        eval_results.append(eval_learned_models(model, model_type, t, test, test_groups=test_groups))
        
    eval_df = pd.concat(eval_results, axis=0)
    eval_df.reset_index(inplace=True, drop=True)

    return eval_df

# Retrieve all model files

In [21]:
MODELS_PATH = "../out/models"

In [22]:
def get_best_model(model_name, budget, suffix=".model"):
    model_files = [f for f in listdir(MODELS_PATH) if f != '.gitignore' and isfile(join(MODELS_PATH, f)) and f.startswith(model_name+"_B{}_".format(budget))]
    best_model = None
    best_loss = np.inf
    
    return lightgbm.Booster(model_file=join(MODELS_PATH, mf[0]))

#     for mf in model_files:
#         model = lightgbm.Booster(model_file=join(MODELS_PATH, mf))
    

In [109]:
std_gbdt = lightgbm.Booster(model_file=join(MODELS_PATH, "std_gbdt_census_T500_S0100_L24_R234.model"))
adv_boost = lightgbm.Booster(model_file=join(MODELS_PATH, "adv_boosting_census_B150_T500_S0100_L24_R465.model"))
non_interf = lightgbm.Booster(model_file=join(MODELS_PATH, "non_interferent_census_B150_T200_S0100_L24_A050_R169.model"))

# a good one !
# adv_boosting_census_B150_claudio_T200_S0100_L16_A025_R199.model

std_gbdt.save_model("xxxx.model", num_iteration=234)
std_gbdt = lightgbm.Booster(model_file="xxxx.model")

adv_boost.save_model("xxxx.model", num_iteration=465)
adv_boost = lightgbm.Booster(model_file="xxxx.model")

non_interf.save_model("xxxx.model", num_iteration=100)
non_interf = lightgbm.Booster(model_file="xxxx.model")

print ("STD:", std_gbdt.num_trees() )
print ("AB :", adv_boost.num_trees() )
print ("NI :", non_interf.num_trees() )



STD: 234
AB : 465
NI : 100


In [110]:
# load train/valid/test
train, valid, test, _ = load_atk_train_valid_test("../data/census/train_ori.csv.bz2", 
                                                       "../data/census/valid_ori.csv.bz2", 
                                                       "../data/census/test_ori.csv.bz2")

Loading pre-processed files...


In [111]:
# load train/valid/test
train_att, valid_att, test_att, _ = load_atk_train_valid_test("../data/census/train_B150.csv.bz2", 
                                                       "../data/census/valid_B150.csv.bz2", 
                                                       "../data/census/test_B150.csv.bz2")

test_groups = test_att['instance_id'].value_counts().sort_index().values
test_att = test_att.iloc[:, 1:]

valid_groups = valid_att['instance_id'].value_counts().sort_index().values
valid_att = valid_att.iloc[:, 1:]

train_groups = train_att['instance_id'].value_counts().sort_index().values
train_att = train_att.iloc[:, 1:]

Loading pre-processed files...


In [112]:
std_gbdt_df = eval_learned_models(std_gbdt, "Std GBDT", test)
adv_boost_df = eval_learned_models(adv_boost, "Adv. Boosting", test)
non_interf_df = eval_learned_models(non_interf, "Non-Interferent", test)

Std GBDT learning - eval_log_loss on D_test = 0.30179
Std GBDT learning - eval_binary_err_rate on D_test = 0.13897
Std GBDT learning - eval_f1 on D_test = 0.68630
Std GBDT learning - eval_roc_auc on D_test = 0.91595
******************************************************************************************************
Adv. Boosting learning - eval_log_loss on D_test = 0.31860
Adv. Boosting learning - eval_binary_err_rate on D_test = 0.14561
Adv. Boosting learning - eval_f1 on D_test = 0.62974
Adv. Boosting learning - eval_roc_auc on D_test = 0.91334
******************************************************************************************************
Non-Interferent learning - eval_log_loss on D_test = 0.29105
Non-Interferent learning - eval_binary_err_rate on D_test = 0.13422
Non-Interferent learning - eval_f1 on D_test = 0.70520
Non-Interferent learning - eval_roc_auc on D_test = 0.92177
**************************************************************************************************

In [113]:
std_gbdt_df_att = eval_learned_models(std_gbdt, "Std GBDT", test_att, test_groups)
adv_boost_df_att = eval_learned_models(adv_boost, "Adv. Boosting", test_att, test_groups)
non_interf_df_att = eval_learned_models(non_interf, "Non-Interferent", test_att, test_groups)

Std GBDT learning - eval_log_loss_uma on D_test_att = 0.36386
Std GBDT learning - eval_binary_err_rate_uma on D_test_att = 0.16893
Std GBDT learning - eval_f1_uma on D_test_att = 0.64282
Std GBDT learning - eval_roc_auc_uma on D_test_att = 0.87910
******************************************************************************************************
Adv. Boosting learning - eval_log_loss_uma on D_test_att = 0.32753
Adv. Boosting learning - eval_binary_err_rate_uma on D_test_att = 0.14859
Adv. Boosting learning - eval_f1_uma on D_test_att = 0.62500
Adv. Boosting learning - eval_roc_auc_uma on D_test_att = 0.90705
******************************************************************************************************
Non-Interferent learning - eval_log_loss_uma on D_test_att = 0.35503
Non-Interferent learning - eval_binary_err_rate_uma on D_test_att = 0.16097
Non-Interferent learning - eval_f1_uma on D_test_att = 0.66606
Non-Interferent learning - eval_roc_auc_uma on D_test_att = 0.89100
**

In [91]:
for i,name in sorted( zip(std_gbdt.feature_importance(importance_type='gain'),test.columns.values) ):
    print ("{:.2f} : {}".format(i,name) )

0.00 : native_country
0.00 : race
0.00 : workclass
0.00 : occupation
0.00 : marital_status
0.00 : relationship
0.16 : fnlwgt
0.18 : hours_per_week
0.23 : capital_loss
0.27 : sex
0.49 : education_num
0.64 : age
0.75 : capital_gain


In [92]:
for i,name in sorted( zip(adv_boost.feature_importance(importance_type='gain'),test.columns.values) ):
    print ("{:.2f} : {}".format(i,name) )

0.00 : native_country
0.00 : race
0.00 : workclass
0.00 : marital_status
0.00 : relationship
0.00 : occupation
0.11 : fnlwgt
0.17 : capital_loss
0.18 : hours_per_week
0.19 : sex
0.33 : education_num
0.40 : age
0.59 : capital_gain


In [93]:
for i,name in sorted( zip(non_interf.feature_importance(importance_type='gain'),test.columns.values) ):
    print ("{:.2f} : {}".format(i,name) )

0.00 : native_country
0.00 : race
0.00 : workclass
0.00 : occupation
0.00 : marital_status
0.00 : relationship
0.17 : hours_per_week
0.18 : fnlwgt
0.24 : capital_loss
0.28 : sex
0.50 : education_num
0.65 : age
0.71 : capital_gain


In [60]:
from sklearn.metrics import classification_report, confusion_matrix

print (
        classification_report(y_pred=np.sign(adv_boost.predict(test_att.iloc[:,:-1])), 
                      y_true=test_att.iloc[:,-1])
        )

print (
        classification_report(y_pred=np.sign(non_interf.predict(test_att.iloc[:,:-1])), 
                      y_true=test_att.iloc[:,-1])
        )

              precision    recall  f1-score   support

          -1       1.00      1.00      1.00    419406
           1       0.46      0.47      0.46      2217

   micro avg       0.99      0.99      0.99    421623
   macro avg       0.73      0.73      0.73    421623
weighted avg       0.99      0.99      0.99    421623

              precision    recall  f1-score   support

          -1       1.00      0.98      0.99    419406
           1       0.19      0.68      0.29      2217

   micro avg       0.98      0.98      0.98    421623
   macro avg       0.59      0.83      0.64    421623
weighted avg       0.99      0.98      0.99    421623



In [61]:
from sklearn.metrics import classification_report, confusion_matrix

print ( confusion_matrix(y_pred=np.sign(adv_boost.predict(test_att.iloc[:,:-1])), 
                      y_true=test_att.iloc[:,-1]) )
print ( confusion_matrix(y_pred=np.sign(non_interf.predict(test_att.iloc[:,:-1])), 
                      y_true=test_att.iloc[:,-1]) )

[[418179   1227]
 [  1184   1033]]
[[412899   6507]
 [   714   1503]]


In [62]:
from sklearn.metrics import classification_report, confusion_matrix

print ( confusion_matrix(y_pred=np.sign(adv_boost.predict(test.iloc[:,:-1])), 
                      y_true=test.iloc[:,-1]) )
print ( confusion_matrix(y_pred=np.sign(non_interf.predict(test.iloc[:,:-1])), 
                      y_true=test.iloc[:,-1]) )

[[6659  169]
 [1184 1033]]
[[6338  490]
 [ 714 1503]]


In [63]:
from sklearn.metrics import classification_report, confusion_matrix

print (
        classification_report(y_pred=np.sign(adv_boost.predict(test.iloc[:,:-1])), 
                      y_true=test.iloc[:,-1])
        )

print (
        classification_report(y_pred=np.sign(non_interf.predict(test.iloc[:,:-1])), 
                      y_true=test.iloc[:,-1])
        )

              precision    recall  f1-score   support

          -1       0.85      0.98      0.91      6828
           1       0.86      0.47      0.60      2217

   micro avg       0.85      0.85      0.85      9045
   macro avg       0.85      0.72      0.76      9045
weighted avg       0.85      0.85      0.83      9045

              precision    recall  f1-score   support

          -1       0.90      0.93      0.91      6828
           1       0.75      0.68      0.71      2217

   micro avg       0.87      0.87      0.87      9045
   macro avg       0.83      0.80      0.81      9045
weighted avg       0.86      0.87      0.86      9045



In [39]:
for cut in range(100, 1100, 100):
    print ("-------- cut at", cut)
    non_interf.save_model("xxxx.model", num_iteration=cut)
    non_interf_cut = lightgbm.Booster(model_file="xxxx.model")
    eval_learned_models(non_interf_cut, "Non-Interferent", test_att, test_groups)

-------- cut at 100
Non-Interferent learning - eval_log_loss_uma on D_test_att = 0.46650
Non-Interferent learning - eval_binary_err_rate_uma on D_test_att = 0.21526
Non-Interferent learning - eval_f1_uma on D_test_att = 0.44260
Non-Interferent learning - eval_roc_auc_uma on D_test_att = 0.76852
******************************************************************************************************
-------- cut at 200
Non-Interferent learning - eval_log_loss_uma on D_test_att = 0.48012
Non-Interferent learning - eval_binary_err_rate_uma on D_test_att = 0.22985
Non-Interferent learning - eval_f1_uma on D_test_att = 0.39826
Non-Interferent learning - eval_roc_auc_uma on D_test_att = 0.76180
******************************************************************************************************
-------- cut at 300
Non-Interferent learning - eval_log_loss_uma on D_test_att = 0.48012
Non-Interferent learning - eval_binary_err_rate_uma on D_test_att = 0.22985
Non-Interferent learning - eval_f1_um

In [75]:
adv_boost.num_trees()

100

# Save the DataFrame containing results

In [None]:
overall_df.to_csv("../plots/"+ATTACKER+".csv", sep=",", index=False)