In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
import tqdm
from tqdm import tqdm_notebook, notebook
import matplotlib.pyplot as plt
import numpy as np
import rankeval
import scipy as sc
import time
import hyperopt
import copy
import random as rng
import lightgbm as lgb
import cython

import seaborn as sns

from sklearn.metrics import classification_report, confusion_matrix
from sklearn.utils import class_weight

from rankeval.dataset import Dataset
from rankeval.model import RTEnsemble
from rankeval.metrics import Metric, NDCG, DCG, MAP
from rankeval.metrics import NDCG
import rankeval.analysis.statistical
import rankeval.metrics
from rankeval.analysis.feature import feature_importance

from hyperopt import base
import gc
base.have_bson = False

%load_ext Cython
%matplotlib inline

In [None]:
from collections import defaultdict

class SDCG(Metric):

    def __init__(self, name='SDCG', cutoff=None, implementation="flat"):

        super(SDCG, self).__init__(name)
        self.cutoff = cutoff
        self.implementation = implementation

    def eval(self, dataset, y_pred):

        return super(SDCG, self).eval(dataset, y_pred)

    def eval_per_query(self, y, y_pred):
        idx_y_pred_sorted = np.argsort(y_pred,kind='stable')[::-1]
        if self.cutoff is not None:
            idx_y_pred_sorted = idx_y_pred_sorted[:self.cutoff]

        discount = np.log2(np.arange(2, idx_y_pred_sorted.size + 2))

        if self.implementation == "flat":
            gain = y[idx_y_pred_sorted]
        elif self.implementation == "exp":
            gain = np.exp2(y[idx_y_pred_sorted]) - 1.0

        dcg = (gain / discount).sum()
        return dcg

    def __str__(self):
        s = self.name
        if self.cutoff is not None:
            s += "@{}".format(self.cutoff)
        return s

class SNDCG(Metric):

    def __init__(self, name='SNDCG', cutoff=None, no_relevant_results=1.0,
                 implementation="exp"):

        super(self.__class__, self).__init__(name)
        self.cutoff = cutoff
        self.no_relevant_results = no_relevant_results
        self.implementation = implementation
        self.dcg = SDCG(cutoff=self.cutoff,
                       implementation=self.implementation)

        self._current_dataset = None
        self._current_rel_qid = None
        self._cache_idcg_score = defaultdict(int)

    def eval(self, dataset, y_pred):
        # used to cache ideal DCG scores on a dataset basis
        self._current_dataset = dataset
        self._current_rel_qid = 0

        # Compute the ideal DCG scores only once and cache them
        if self._current_dataset not in self._cache_idcg_score:

            idcg_score = np.ndarray(shape=dataset.n_queries, dtype=np.float32)
            for rel_id, (qid, q_y, _) in enumerate(
                    self.query_iterator(dataset, dataset.y)):
                idcg_score[rel_id] = self.dcg.eval_per_query(q_y, q_y)

            self._cache_idcg_score[self._current_dataset] = idcg_score

        return super(self.__class__, self).eval(dataset, y_pred)

    def eval_per_query(self, y, y_pred):
        dcg_score = self.dcg.eval_per_query(y, y_pred)

        if self._current_rel_qid is not None:
            idcg_score = \
                self._cache_idcg_score[self._current_dataset][self._current_rel_qid]
            self._current_rel_qid += 1
        else:
            idcg_score = self.dcg.eval_per_query(y, y)

        if idcg_score != 0:
            ndcg = dcg_score / idcg_score
        else:
            ndcg = self.no_relevant_results
        return ndcg

    def __str__(self):
        s = self.name
        if self.cutoff is not None:
            s += "@{}".format(self.cutoff)
        return s

In [None]:
istella_name = "ISTELLA-FULL"
msn_name = "MSN30KF1"
istella_path = './data/ISTELLA/'
msn_path = './data/MSLR/Fold1'

datasets_name = [msn_name, istella_name]
datasets_pairs = [(msn_name, msn_path), (istella_name, istella_path)]

In [None]:
rankeval_datasets = {}
for dataset_name, dataset_path in datasets_pairs:
    rankeval_datasets[dataset_name] = dict([
        (split, Dataset.load(os.path.join(dataset_path, split + '.txt'), name="{} {}".format(dataset_name, split.capitalize()))) for split in [ 'vali', 'test']
])


In [None]:
rankeval_models = {}

for dataset_name in datasets_name:
    path_model = './data/rankers/{}-lgb_model.txt'.format(dataset_name)
    rankeval_models[dataset_name] = RTEnsemble(path_model, format='QuickRank')
    print('Model trained on {} has {} trees'.format(dataset_name, rankeval_models[dataset_name].n_trees))
    

In [None]:
feature_importances = {}
for dataset_name in tqdm.notebook.tqdm(datasets_name):
    f_importances = feature_importance(rankeval_models[dataset_name], rankeval_datasets[dataset_name]['vali'], metric=None, normalize=True)
    importances = f_importances[0].sortby(f_importances[0], ascending=False)
    features_by_importance = importances.coords['feature'].values
    feature_importances[dataset_name] = features_by_importance

In [None]:
y_pred_rankeval = {}
y_partial_pred_rankeval = {}
y_pred_along_training = {}
for dataset_name in tqdm.notebook.tqdm(datasets_name):   
    y_pred_rankeval[dataset_name] = {}
    y_partial_pred_rankeval[dataset_name] = {}
    y_pred_along_training[dataset_name] = {}

    for split in tqdm.notebook.tqdm(rankeval_datasets[dataset_name], leave=False):
        y_pred_rankeval[dataset_name][split], y_partial_pred_rankeval[dataset_name][split], _ = rankeval_models[dataset_name].score(rankeval_datasets[dataset_name][split], detailed=True)
        y_pred_along_training[dataset_name][split] = y_partial_pred_rankeval[dataset_name][split].cumsum(axis=1)

In [None]:
classification_train = {}
classification_vali = {}
classification_test = {}

y_pred_rankeval_train = {}
y_pred_along_train = {}
y_pred_rankeval_test = {}
y_pred_along_test = {}
y_pred_rankeval_vali = {}
y_pred_along_vali = {}
for dataset_name in datasets_name:
    mask_validation = np.random.choice(a=[True, False, False, False], size=(rankeval_datasets[dataset_name]['test'].n_queries,))
    mask_test = ~ mask_validation

    mask_validation_docs = np.zeros((rankeval_datasets[dataset_name]['test'].n_instances,), dtype=np.bool)
    for rel_id, (qid, start, end) in enumerate(rankeval_datasets[dataset_name]['test'].query_iterator()):
        if mask_validation[rel_id]:
            mask_validation_docs[start:end] = True
    mask_test_docs = ~ mask_validation_docs

    validation_qids = rankeval_datasets[dataset_name]['test'].query_ids[mask_validation]
    test_qids = rankeval_datasets[dataset_name]['test'].query_ids[mask_test]

    classification_train[dataset_name] = copy.deepcopy(rankeval_datasets[dataset_name]['vali'])
    classification_vali[dataset_name] = rankeval_datasets[dataset_name]['test'].subset(validation_qids)
    classification_test[dataset_name] = rankeval_datasets[dataset_name]['test'].subset(test_qids)

    y_pred_rankeval_train[dataset_name] = y_pred_rankeval[dataset_name]['vali']
    y_pred_along_train[dataset_name] = y_pred_along_training[dataset_name]['vali']
    
    y_pred_rankeval_test[dataset_name] = y_pred_rankeval[dataset_name]['test'][mask_test_docs]
    y_pred_along_test[dataset_name] = y_pred_along_training[dataset_name]['test'][mask_test_docs, :]
    
    y_pred_rankeval_vali[dataset_name] = y_pred_rankeval[dataset_name]['test'][mask_validation_docs]
    y_pred_along_vali[dataset_name] = y_pred_along_training[dataset_name]['test'][mask_validation_docs, :]

In [None]:
del rankeval_datasets

In [None]:
print(gc.collect())

In [None]:
def get_lgb_parameters(p):
    # parameters to cast
    for p_name in ["min_data_in_leaf", "num_leaves", "max_position", "eval_at"]:
        if p_name in p:
            p[p_name] = int(p[p_name])

    params = {
        'boosting_type': 'gbdt',
        'objective': 'binary',
        'max_position': 10,
        'metric': 'binary_logloss',
    }
    params.update(**p)

    other = {}
    if "num_boost_round" in params:
        other["num_boost_round"] = params.pop("num_boost_round")
    else:
        other["num_boost_round"] = 1000

    if "early_stopping_rounds" in params:
        other["early_stopping_rounds"] = params.pop("early_stopping_rounds")
    else:
        other["early_stopping_rounds"] = 100

    return params, other

In [None]:
def get_lgb_objective(lgb_datasets, metric, debug=None):

    def lgb_objective(p):
        try:
            params, other = get_lgb_parameters(p)

            evals_result = {}
            lgb_model = lgb.train(
                params, 
                lgb_datasets['train'],
                valid_sets=[lgb_datasets['vali']],
                valid_names=['vali'],
                evals_result=evals_result,
                verbose_eval=False,
                **other
            )

            
            loss = evals_result['vali'][metric][lgb_model.best_iteration-1]
            
            # debug
            if debug is not None:
                if "valid_loss" not in debug or debug["valid_loss"] is None or debug["valid_loss"] > loss:
                    debug["p"] = p
                    debug["params"] = params
                    debug["other"] = other
                    debug["valid_loss"] = loss

                    debug["model"] = lgb_model
                    debug["num_trees"] = lgb_model.best_iteration

            return {
                "loss": loss,
                "status": hyperopt.STATUS_OK
            }

        except Exception as e:
            print(e)
            return {
                "status": hyperopt.STATUS_FAIL,
                "exception": str(e)
            }

    return lgb_objective

In [None]:
def get_params_space(num_boost_round=1500, num_leaves=[64], max_depth=[8] ,early_stopping_rounds=100):
    
    # parameters space
    params_space = {
        'learning_rate': hyperopt.pyll.base.scope.maximum(
            hyperopt.hp.quniform("learning_rate", 10, 100, 5) / 100,
            0.1
        ),
        'num_leaves': hyperopt.hp.choice('num_leaves', num_leaves),
        'max_depth': hyperopt.hp.choice('max_depth', max_depth),
        
        'min_data_in_leaf': hyperopt.hp.quniform('min_data_in_leaf', 10, 10000, 10),
        
        'min_sum_hessian_in_leaf': hyperopt.pyll.base.scope.maximum(
            hyperopt.hp.quniform('min_sum_hessian_in_leaf', 1, 1e5, 100) / 1000,
            0.001
        ),
        
        'lambda_l1': hyperopt.pyll.base.scope.maximum(
            hyperopt.hp.quniform('lambda_l1', 0, 100, 2) / 10,
            0.1
        ),

        'lambda_l2': hyperopt.pyll.base.scope.maximum(
            hyperopt.hp.quniform('lambda_l2', 0, 100, 2) / 10,
            0.0
        ),
        
        'min_gain_to_split': hyperopt.pyll.base.scope.maximum(
            hyperopt.hp.quniform('min_gain_to_split', 0, 100, 2) / 10,
            0.0
        ),
        
        'boosting_type': 'gbdt',
        'early_stopping_rounds': early_stopping_rounds,
        'num_boost_round': num_boost_round,
        'feature_pre_filter': False,
        'verbosity' : -1
    }
    
    return params_space

In [None]:
def print_stats(y_true, y_pred):
    print(classification_report(y_true, y_pred))
    cm = confusion_matrix(y_true, y_pred)
    ax= plt.subplot()
    sns.heatmap(cm, annot=True, ax = ax, cmap='Greens', fmt='g') #annot=True to annotate cells

    # labels, title and ticks
    ax.set_xlabel('Predicted labels')
    ax.set_ylabel('True labels')
    ax.set_title('Confusion Matrix')
    ax.xaxis.set_ticklabels(['exit', 'continue']) 
    ax.yaxis.set_ticklabels(['exit', 'continue'])
    plt.show()
    

### Training

In [None]:
dcg_10 = SDCG(cutoff=10, implementation='exp')
ndcg_10 = SNDCG(cutoff=10, implementation='exp', no_relevant_results=1.0)

In [None]:
def get_features(dataset, scores, sentinel_tree, base_features ,rank_features=1, feat_importances=None):
    sentinel_pos = sentinel_tree + 1
    extra=2
    
    if feat_importances is not None:
        data_features = base_features
    
    else:
        data_features = dataset.n_features
    
    score_features = rank_features #sentinel_pos 
    n_feats = data_features + score_features  + extra + 1
    n_inst = dataset.n_instances
    rank_idx = np.zeros((n_inst, score_features))
    features = np.zeros(shape=(n_inst, n_feats))
    
    if feat_importances is not None:
        features[:, :data_features] = dataset.X[:, feat_importances[:data_features]]
    else:
        features[:, :data_features] = dataset.X[:]
    
    for tree in range(sentinel_pos - score_features ,sentinel_pos):
        for rel_id, (qid, start, end) in enumerate(dataset.query_iterator()):
            query_scores = scores[start:end, tree]
            query_range = query_scores.max() - query_scores.min()
            norm_query_scores = (query_scores - query_scores.min()) / (query_range) if query_range != 0 else query_scores - query_scores.min()
            query_ranks = np.argsort(query_scores, kind='stable')[::-1]
            col = tree - (sentinel_pos - score_features)
            for rank, doc in enumerate(query_ranks):
                rank_idx[start + doc, col] = rank
            
            features[start:end, -1] = float(end - start)
            features[start:end, data_features+rank_features + 0] = query_scores # extra 1
            features[start:end, data_features+rank_features + 1] = norm_query_scores # extra 2
    
    features[:, data_features:data_features + rank_features] = rank_idx
    return features

def get_oracle(dataset, scores, k):
    mask = np.zeros(shape=(dataset.n_instances,))
    for rel_id, (qid, start, end) in enumerate(dataset.query_iterator()):
        final_ranks = np.argsort(scores[start:end, -1], kind='stable')[::-1]
        true_rel = dataset.y[start : end]
        top_k = final_ranks[:k]
        top_k = np.array([r for r in top_k if true_rel[r] > 0 ], dtype=int)
        mask[start:end][top_k] = True 
    
    return mask

def class_weight_by_query(dataset, binary_labels):
    weights = np.zeros((dataset.n_instances))
    for rel_id, (qid, start, end) in enumerate(dataset.query_iterator()):
        query_labels = binary_labels[start : end]
        query_size = end - start
        n_pos = query_labels.sum()
        n_neg = query_size - n_pos
        counts = np.array([n_neg, n_pos])
        w  = np.divide(query_size, 2*counts, out=np.zeros((2,)), where=counts!=0)
        for doc in range(start, end):
            label = binary_labels[doc].astype(int)
            weights[doc] = w[label]
        
        
    return weights


In [None]:
#best features: msn: 54 (136), istella: 118 (220)

In [None]:
k = 10
window = 0
n_regressors = 10

models = {}
params = {}
valid_losses = {}
for dataset_name in tqdm.notebook.tqdm(datasets_name):
    base_features = [f for f in range(10, classification_train[dataset_name].n_features, classification_train[dataset_name].n_features // 6)]
    base_features.append(classification_train[dataset_name].n_features)
    margins = [5]
    base_features = [54, 118]
    
    models[dataset_name] = {}
    params[dataset_name] = {}
    valid_losses[dataset_name] = {}
    NUM_TRIALS = 100
    POSITIONS = [50,100,200]

    for ex in margins:
        margin = k + ex
        models[dataset_name][margin] = {}
        params[dataset_name][margin] = {}
        valid_losses[dataset_name][margin] = {}
        for features in base_features:
            models[dataset_name][margin][features] = {}
            params[dataset_name][margin][features] = {}
            valid_losses[dataset_name][margin][features] = {}
            for position in POSITIONS:
                tree = position - 1

                X_train = get_features(classification_train[dataset_name], 
                                       y_pred_along_train[dataset_name], 
                                       sentinel_tree=tree, 
                                       base_features=features, 
                                       feat_importances=feature_importances[dataset_name])

                X_test =  get_features(classification_test[dataset_name],  
                                       y_pred_along_test[dataset_name],  
                                       sentinel_tree=tree, 
                                       base_features=features, 
                                       feat_importances=feature_importances[dataset_name])

                X_vali =  get_features(classification_vali[dataset_name],  
                                       y_pred_along_vali[dataset_name],  
                                       sentinel_tree=tree, 
                                       base_features=features, 
                                       feat_importances=feature_importances[dataset_name])


                y_train = get_oracle(classification_train[dataset_name], y_pred_along_train[dataset_name], margin)
                y_test =  get_oracle(classification_test[dataset_name],  y_pred_along_test[dataset_name],  margin)
                y_vali =  get_oracle(classification_vali[dataset_name],  y_pred_along_vali[dataset_name],  margin)


                w_train = np.exp2(classification_train[dataset_name].y) * (y_train)
                w_test  = np.exp2(classification_test[dataset_name].y)  * (y_test)
                w_vali  = np.exp2(classification_vali[dataset_name].y)  * (y_vali)

                w_train[w_train == 0] = 1
                w_test[w_test == 0]   = 1
                w_vali[w_vali == 0]   = 1

                w_train = w_train  * class_weight_by_query(classification_train[dataset_name], y_train)
                w_test  = w_test   * class_weight_by_query(classification_test[dataset_name], y_test)   
                w_vali  = w_vali   * class_weight_by_query(classification_vali[dataset_name], y_vali)  




                data = [('train', X_train , y_train, w_train), ('vali', X_vali, y_vali, w_vali), ('test', X_test, y_test, w_test)]



                lgb_datasets = dict([
                    (dataset_name, lgb.Dataset(data=X, label=y, weight= w, silent=False)) 
                for dataset_name, X, y, w in data
                ])

                del data

                opt_res = {}
                results = {}
                print(margin, features, position)
                for num_trees in [n_regressors]:
                    params_space = get_params_space(num_boost_round=num_trees, num_leaves=[64], max_depth=[8], early_stopping_rounds=10)
                    # trials object to track progress
                    bayes_trials = hyperopt.Trials()
                    opt_res[num_trees] = {}
                    # optimize
                    np.random.seed(random_seed)
                    best = hyperopt.fmin(
                        fn=get_lgb_objective(lgb_datasets, metric='binary_logloss', debug=opt_res[num_trees]),
                        space=params_space,
                        algo=hyperopt.tpe.suggest,
                        max_evals=NUM_TRIALS,
                        trials=bayes_trials,
                        rstate=np.random.RandomState(random_seed),
                    )
                    results[num_trees] = {
                        'best_params': hyperopt.space_eval(params_space, best),
                    }
                models[dataset_name][margin][features][position] = opt_res[num_trees]['model']
                params[dataset_name][margin][features][position] = opt_res[num_trees]['p']
                valid_losses[dataset_name][margin][features][position] = opt_res[num_trees]['valid_loss']
                del lgb_datasets
                gc.collect()

In [None]:
speedups = {}
deltas = {}
sizes = {}
ndcgs = {}
steps = [v for v in range(10, 80, 10)]
for dataset_name in datasets_name:
    print(dataset_name)
    dcg_unpruned = dcg_10.eval(classification_test[dataset_name], y_pred_rankeval_test[dataset_name])[1]
    idcg = dcg_10.eval(classification_test[dataset_name], classification_test[dataset_name].y)[1]
    ndcg_unpruned = dcg_unpruned / idcg
    ndcg_unpruned[np.isnan(ndcg_unpruned)] = 1.0
    unpruned_mean = ndcg_unpruned.mean()
    base_traversals = classification_test[dataset_name].n_instances * rankeval_models[dataset_name].n_trees
    print('ndcg unpruned = {:.4f}\n'.format(ndcg_unpruned.mean()))
    speedups[dataset_name] = {}
    deltas[dataset_name] = {}
    sizes[dataset_name] = {}
    ndcgs[dataset_name] = {}
    csizes = np.zeros((classification_test[dataset_name].n_queries,))
    for ex in tqdm.notebook.tqdm(margins, leave=False):
        margin = k + ex
        speedups[dataset_name][margin] = {}
        deltas[dataset_name][margin] = {}
        sizes[dataset_name][margin] = {}
        ndcgs[dataset_name][margin] = {}
        for features in tqdm.notebook.tqdm(base_features, leave=False):
            speedups[dataset_name][margin][features]= {}
            deltas[dataset_name][margin][features] = {}
            sizes[dataset_name][margin][features] = {}
            ndcgs[dataset_name][margin][features] = {}
            for position in tqdm.notebook.tqdm(POSITIONS, leave=False):
                sentinel_tree = position - 1
                speedups[dataset_name][margin][features][position] = []
                deltas[dataset_name][margin][features][position] = []
                sizes[dataset_name][margin][features][position] = []
                ndcgs[dataset_name][margin][features][position] = {}
                for threshold in tqdm.notebook.tqdm(steps, leave=False):
                    t = threshold / 100
                    bst = models[dataset_name][margin][features][position]

                    X_test =  get_features(classification_test[dataset_name],  
                                           y_pred_along_test[dataset_name],  
                                           sentinel_tree=sentinel_tree, 
                                           base_features=features,
                                           feat_importances=feature_importances[dataset_name])

                    y_test =  get_oracle(classification_test[dataset_name],  y_pred_along_test[dataset_name],  margin)

                    y_pred = bst.predict(X_test) > t

                    mask_pruning = ~ y_pred
                    y_pred_rankeval_pruned = copy.deepcopy(y_pred_rankeval_test[dataset_name])
                    y_pred_rankeval_pruned[mask_pruning] += (sentinel_tree - 10000)
                    dcg_pruned = dcg_10.eval(classification_test[dataset_name], y_pred_rankeval_pruned)[1]
                    ndcg_pruned = dcg_pruned / idcg
                    ndcg_pruned[np.isnan(ndcg_pruned)] = 1.0
                    pruned_mean = ndcg_pruned.mean()

                    delta = - (unpruned_mean - pruned_mean) / unpruned_mean
                    done_traversals = classification_test[dataset_name].n_instances * n_regressors + (classification_test[dataset_name].n_instances * position) + (y_pred.sum() * ( rankeval_models[dataset_name].n_trees - position))
                    speedup = base_traversals / done_traversals 

                    for rel_id, (qid, start, end) in enumerate(classification_test[dataset_name].query_iterator()):
                        y_pred_query = y_pred[start : end]
                        ndocs = y_pred_query.sum()
                        csizes[rel_id] = ndocs

                    ndcgs[dataset_name][margin][features][position][threshold] = ndcg_pruned    
                    deltas[dataset_name][margin][features][position].append(delta * 100)
                    speedups[dataset_name][margin][features][position].append(speedup)
                    sizes[dataset_name][margin][features][position].append(np.percentile(csizes, 50))

                    #print('margin = {}'.format(margin))
                    #print('position = {}'.format(position))
                    #print('features = {}'.format(features))
                    #print('confidence = {:.2f}'.format(t))
                    #print('ndcg pruned =  {:.4f}'.format(ndcg_pruned.mean()))
                    print('delta ndcg  = {:.4f} %'.format(delta * 100))
                    print('speedup = {:.2f} x'.format(speedup))
                    
                    #print_stats(y_test, y_pred)
                    
                    print('-----------------------------------------------------')


gc.collect()





In [None]:
ept_speedups = {}
ept_deltas = {}
ept_sizes = {}
ept_ndcg = {}
ept_steps = [v for v in range(10, 120, 10)]
for dataset_name in datasets_name:
    print(dataset_name)
    dcg_unpruned = dcg_10.eval(classification_test[dataset_name], y_pred_rankeval_test[dataset_name])[1]
    idcg = dcg_10.eval(classification_test[dataset_name], classification_test[dataset_name].y)[1]
    ndcg_unpruned = dcg_unpruned / idcg
    ndcg_unpruned[np.isnan(ndcg_unpruned)] = 1.0
    unpruned_mean = ndcg_unpruned.mean()
    base_traversals = classification_test[dataset_name].n_instances *  rankeval_models[dataset_name].n_trees
    print('ndcg unpruned = {:.4f}\n'.format(ndcg_unpruned.mean()))
    
    ept_speedups[dataset_name] = {}
    ept_deltas[dataset_name] = {}
    ept_sizes[dataset_name] = {}
    ept_ndcg[dataset_name] = {}
    
    for position in POSITIONS:          
        ept_speedups[dataset_name][position] = []
        ept_deltas[dataset_name][position] = []
        ept_sizes[dataset_name][position] = []
        ept_ndcg[dataset_name][position] = {}
        for threshold in ept_steps:
            y_pred = np.zeros(shape=(classification_test[dataset_name].n_instances,))
            for rel_id, (qid, start, end) in enumerate(classification_test[dataset_name].query_iterator()):
                query_scores = y_pred_along_test[dataset_name][start:end, position - 1]
                final_ranks = np.argsort(query_scores, kind='stable')[::-1]
                top_k = final_ranks[:15]
                score_kth = query_scores[top_k[-1]]
                t = score_kth - (threshold / 100)
                y_pred[start:end] = query_scores >= t

            mask_pruning = ~ (y_pred.astype(bool))
            y_pred_rankeval_pruned = copy.deepcopy(y_pred_rankeval_test[dataset_name])
            y_pred_rankeval_pruned[mask_pruning] += (sentinel_tree - 10000)
            dcg_pruned = dcg_10.eval(classification_test[dataset_name], y_pred_rankeval_pruned)[1]
            ndcg_pruned = dcg_pruned / idcg
            ndcg_pruned[np.isnan(ndcg_pruned)] = 1.0
            pruned_mean = ndcg_pruned.mean()

            delta = - (unpruned_mean - pruned_mean) / unpruned_mean
            done_traversals = (classification_test[dataset_name].n_instances * position) + (y_pred.sum() * (rankeval_models[dataset_name].n_trees - position))
            speedup = base_traversals  / done_traversals        

            for rel_id, (qid, start, end) in enumerate(classification_test[dataset_name].query_iterator()):
                y_pred_query = y_pred[start : end]
                ndocs = y_pred_query.sum()
                csizes[rel_id] = ndocs

            ept_ndcg[dataset_name][position][threshold] = ndcg_pruned


            ept_deltas[dataset_name][position].append( delta * 100)
            ept_speedups[dataset_name][position].append(speedup)
            ept_sizes[dataset_name][position].append(csizes.mean())
            print(position, threshold/100, pruned_mean, delta*100 ,speedup)

In [None]:
plt.rcParams.update({'font.size': 18})

In [None]:
losses = {}
for dataset_name in datasets_name:
    losses[dataset_name] = {}
    losses[dataset_name][50] =  [valid_losses[dataset_name][15][f][50] for f in base_features]
    losses[dataset_name][100] = [valid_losses[dataset_name][15][f][100] for f in base_features]
    losses[dataset_name][200] = [valid_losses[dataset_name][15][f][200] for f in base_features]
    print(base_features)

In [None]:
for dataset_name in datasets_name:
    fig, ax1 = plt.subplots(figsize=(6,4))
    for position in POSITIONS:
        ax1.plot(base_features, losses[dataset_name][position], label=position, marker='.')
    plt.grid()
    plt.title(dataset_name)
    plt.legend()
    plt.show()

In [None]:
best_n_features = {msn_name: 54, istella_name: 118}
best_margin = 15
position_colors = {50: 'blue', 100: 'red', 200:'green'}
models_color = {'EPT': 'red', 'LEAR': 'blue'}
from sklearn import metrics

In [None]:
## calssificatore performances
# ROC curve
for dataset_name in datasets_name:
    fig, ax1 = plt.subplots(figsize=(6,4))
    for position in POSITIONS:
        bst = models[dataset_name][best_margin][best_n_features[dataset_name]][position]
        X =  get_features(classification_test[dataset_name], 
                          y_pred_along_test[dataset_name],
                          sentinel_tree=position-1, 
                          base_features=best_n_features[dataset_name],
                          feat_importances=feature_importances[dataset_name])
        y_true =  get_oracle(classification_test[dataset_name],  y_pred_along_test[dataset_name],  best_margin)
        y_pred = bst.predict(X)
        fpr, tpr, threshold = metrics.roc_curve(y_true, y_pred)
        #plt.title('Receiver Operating Characteristic')
        ax1.plot(fpr, tpr, label = position, color=position_colors[position])

    ax1.plot([0, 1], [0, 1],'k--')
    ax1.legend(loc = 'lower right')
    ax1.set_xlim([0, 1])
    ax1.set_ylim([0, 1])
    ax1.set_ylabel('True Positive Rate')
    ax1.set_xlabel('False Positive Rate')
    ax1.grid(ls=(0, (5, 5)), alpha=0.5)
    plt.tight_layout()
    plt.title(dataset_name)
    plt.show()
    #fig.savefig('../images/{}_ROC_curve.pdf'.format(dataset_name), format='pdf', bbox_inches='tight')

    fig, ax1 = plt.subplots(figsize=(6,4))
    # P-R curve
    for position in POSITIONS:
        bst = models[dataset_name][best_margin][best_n_features[dataset_name]][position]
        X =  get_features(classification_test[dataset_name], 
                          y_pred_along_test[dataset_name],
                          sentinel_tree=position-1, 
                          base_features=best_n_features[dataset_name],
                          feat_importances=feature_importances[dataset_name])
        y_true =  get_oracle(classification_test[dataset_name],  y_pred_along_test[dataset_name],  best_margin)
        y_pred = bst.predict(X)
        precision, recall, _ = metrics.precision_recall_curve(y_true, y_pred)
        base = y_true[y_true >= 0.99].size / y_true.size
        ax1.plot([0, 1], [base, base],'k--')
        ax1.plot(recall, precision, marker=',', label=position, color=position_colors[position])
        # axis labels
    ax1.set_xlabel('Recall')
    ax1.set_ylabel('Precision')
    # show the legend
    ax1.legend()
    ax1.grid(ls=(0, (5, 5)), alpha=0.5)
    plt.tight_layout()
    plt.title(dataset_name)
    plt.show()
    #fig.savefig('../images/{}_PR_curve.pdf'.format(dataset_name), format='pdf', bbox_inches='tight')


In [None]:
for dataset_name in datasets_name:
    ## EPT performances
    fig, ax1 = plt.subplots(figsize=(6,4))
    for position in POSITIONS:
        ept_delta = ept_deltas[dataset_name][position]
        ept_speedup = ept_speedups[dataset_name][position]
        ax1.plot(ept_speedup, ept_delta, color=position_colors[position], marker='.', label='s = '+str(position),markersize=18)
    ax1.set_xlabel(r'speedup')
    #ax1.set_ylabel(r'$\Delta$ NDCG@10 (%)')
    ax1.grid(ls=(0, (5, 5)), alpha=0.5)
    plt.legend(loc=3)
    #plt.ylim((-0.5, 0.05))
    #plt.xlim((1.4, 3.750))
    plt.tight_layout()
    plt.title(dataset_name)
    plt.show()
    #fig.savefig('../images/{}_EPT_tradeoff.pdf'.format(dataset_name), format='pdf', bbox_inches='tight')

In [None]:
## LEAR performances
for dataset_name in datasets_name:
    fig, ax1 = plt.subplots(figsize=(6,4))
    for position in POSITIONS:
        ept_delta = ept_deltas[dataset_name][position]
        ept_speedup = ept_speedups[dataset_name][position]
        ax1.plot(speedups[dataset_name][best_margin][best_n_features[dataset_name]][position], deltas[dataset_name][best_margin][best_n_features[dataset_name]][position], color=position_colors[position], marker='.', label='s = '+str(position), markersize=18)
    ax1.set_xlabel(r'speedup')
    ax1.set_ylabel(r'$\Delta$ NDCG@10 (%)')
    ax1.grid(ls=(0, (5, 5)), alpha=0.5)
    plt.legend(loc=3)
    plt.tight_layout()
    #plt.ylim((-0.5, 0.05))
    #plt.xlim((2.1, 4.1))
    plt.title(dataset_name)
    plt.show()
#fig.savefig('../images/{}_LEAR_tradeoff.pdf'.format(dataset_name), format='pdf', bbox_inches='tight')

In [None]:
best_position_ept = {msn_name: 200, istella_name: 200}
best_position_lear = {msn_name: 50, istella_name: 100}

In [None]:
## Best vs best
for dataset_name in datasets_name:
    fig, ax1 = plt.subplots(figsize=(6,4))
    ax1.plot(speedups[dataset_name][best_margin][best_n_features[dataset_name]][best_position_lear[dataset_name]], deltas[dataset_name][best_margin][best_n_features[dataset_name]][best_position_lear[dataset_name]], color=models_color['LEAR'], marker='.', label='LEAR(s = {})'.format(best_position_lear[dataset_name]), markersize=18)
    ax1.plot(ept_speedups[dataset_name][best_position_ept[dataset_name]], ept_deltas[dataset_name][best_position_ept[dataset_name]], color=models_color['EPT'], marker='.', label='EPT(s = {})'.format(best_position_ept[dataset_name]), markersize=18)
    ax1.set_xlabel(r'speedup')
    ax1.set_ylabel(r'$\Delta$ NDCG@10 (%)')
    ax1.grid(ls=(0, (5, 5)), alpha=0.5)
    plt.legend(loc=3)
    plt.tight_layout()
    #plt.ylim((-0.30, 0.05))
    #plt.xlim((1.25, 4.2))
    plt.title(dataset_name)
    plt.show()
    #fig.savefig('../images/{}_comparison.pdf'.format(dataset_name), format='pdf', bbox_inches='tight')