# Istella22 - LambdaMART evaluation

In [None]:
import pandas as pd

import ir_measures
from ir_measures import *

from rankeval.dataset import Dataset
from rankeval.model import RTEnsemble

## Load a dataset using RankEval (https://github.com/hpclab/rankeval)

In [None]:
# LambdaMART
#TEST_FILE="data/test.svm"
#MODEL_FILE="models/lambdamart.lgb"

# LambdaMART, MonoT5
#TEST_FILE="data/test.monoT5.svm"
#MODEL_FILE="models/lambdamart.monoT5.lgb"

# LambdaMART, MonoT5, Title, Url and Text
#TEST_FILE="data/test.monoT5.titleUrlText.svm"
#MODEL_FILE="models/lambdamart.monoT5.titleUrlText.lgb"

In [None]:
test_subset = Dataset.load(TEST_FILE, format="svmlight", name="Istella-test")

print("Istella - Test Set")
print("Num. features: ", test_subset.n_features)
print("Num. queries: ", test_subset.n_queries)
print("Num. instances: ", test_subset.n_instances)

## Evaluate the LightGBM (https://github.com/microsoft/LightGBM) LambdaMART

### *** using RankEval

In [None]:
lgbm_lmart = RTEnsemble(MODEL_FILE, name="LGBM_lmart", format="LightGBM")

print("Model statistics")
print("Num. Trees: ", lgbm_lmart.n_trees)

In [None]:
from rankeval.metrics import Precision, Recall, NDCG, MRR, MAP

mrr = MRR()
ap = MAP()
precision_1 = Precision(cutoff=1, threshold=1)
precision_5 = Precision(cutoff=5, threshold=1)
precision_10 = Precision(cutoff=10, threshold=1)

recall_100 = Recall(cutoff=100)
recall_1000 = Recall(cutoff=1000)

ndcg_5 = NDCG(cutoff=5, no_relevant_results=0, implementation='exp')
ndcg_10 = NDCG(cutoff=10, no_relevant_results=0, implementation='exp')
ndcg_20 = NDCG(cutoff=20, no_relevant_results=0, implementation='exp')

In [None]:
from rankeval.analysis.effectiveness import model_performance

istella_model_perf = model_performance(
    datasets=[test_subset], 
    models=[lgbm_lmart], 
    metrics=[mrr, ap,
             precision_1, precision_5, precision_10,
             recall_100, recall_1000,
             ndcg_5, ndcg_10, ndcg_20]
    )

istella_model_perf.to_dataframe()

### *** using ir_measures (https://github.com/terrierteam/ir_measures/)

In [None]:
y_pred = lgbm_lmart.score(test_subset, detailed=False)
print(y_pred[0:5])

In [None]:
print(test_subset.get_query_sizes()[0:5])

y = test_subset.y
print(y[0:5])

In [None]:
qid = 0
total_counter = 0

run = []
qrels = []

while (qid < len(test_subset.get_query_sizes())):
    doc_id = 0
    while (doc_id < test_subset.get_query_sizes()[qid]):
        run.append(ir_measures.ScoredDoc(str(qid), str(doc_id), float(y_pred[total_counter])))
        qrels.append(ir_measures.Qrel(str(qid), str(doc_id), int(y[total_counter])))
        doc_id += 1
        total_counter += 1
    qid += 1

In [None]:
measures_dict = ir_measures.calc_aggregate([AP, RR, P(rel=1)@1, P(rel=1)@5, P(rel=1)@10, R@100, R@1000, nDCG(dcg='exp-log2')@5, nDCG(dcg='exp-log2')@10, nDCG(dcg='exp-log2')@20, Judged@10], qrels, run)
#print(measures_dict)

measures_df = pd.DataFrame.from_dict(measures_dict, orient='index')
measures_df

## Statistical significance analysis (using RankEval)

In [None]:
from rankeval.analysis.statistical import statistical_significance

# lmart vs lmart_monoT5
MODEL_FILE_1="models/lambdamart.lgb"
MODEL_FILE_2="models/lambdamart_monoT5.lgb"

lmart1 = RTEnsemble(MODEL_FILE_1, name="LGBM_lmart_1", format="LightGBM")
lmart2 = RTEnsemble(MODEL_FILE_2, name="LGBM_lmart_2", format="LightGBM")

stat_sig = statistical_significance(datasets=[test_subset],
                                    model_a=lmart1, model_b=lmart2, 
                                    metrics=[mrr, ap,
                                             precision_1, precision_5, precision_10,
                                             ndcg_10, ndcg_20],
                                    n_perm=100000
                                   )
stat_sig.to_dataframe()

In [None]:
from rankeval.analysis.statistical import statistical_significance

# lmart vs lmart_monoT5_titleUrlText
MODEL_FILE_1="models/lambdamart.lgb"
MODEL_FILE_2="models/lambdamart_monoT5_titleUrlText.lgb"

lmart1 = RTEnsemble(MODEL_FILE_1, name="LGBM_lmart_1", format="LightGBM")
lmart2 = RTEnsemble(MODEL_FILE_2, name="LGBM_lmart_2", format="LightGBM")

stat_sig = statistical_significance(datasets=[test_subset],
                                    model_a=lmart1, model_b=lmart2, 
                                    metrics=[mrr, ap,
                                             precision_1, precision_5, precision_10,
                                             ndcg_10, ndcg_20],
                                    n_perm=100000
                                   )
stat_sig.to_dataframe()