Use python 3.10 for this project

In [54]:
pip install "numpy<2.0" python-terrier==0.12.1 nltk scikit-learn lightgbm fastrank tensorflow==2.11 keras LambdaRankNN

Note: you may need to restart the kernel to use updated packages.


In [55]:
import pyterrier as pt
import nltk

nltk.download("punkt")

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\sebim\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [56]:
dataset = pt.get_dataset("irds:nfcorpus")

In [57]:
from pathlib import Path

index = pt.index.IterDictIndexer(
    str(Path.cwd()),  # this will be ignored
    meta={
        "docno": 16,
        "title": 256,
        "abstract": 65536,
        "url": 128,
    },
    type=pt.index.IndexingType.MEMORY,
).index(dataset.get_corpus_iter(), fields=["title", "abstract", "url"])

nfcorpus documents: 100%|██████████| 5371/5371 [00:05<00:00, 1007.13it/s]


In [58]:
base_model = pt.terrier.FeaturesRetriever(
    index,
    wmodel="BM25",
    features=["WMODEL:BM25", "WMODEL:PL2", "WMODEL:DPH"],
    num_results=100,
    metadata=["docno", "title", "abstract", "url"],
)

In [59]:
from models.lambdaMART import *
from models.lambdaRank import *
from models.rankSVM import *
from models.coordAscent import *
from models.randomForest import *
from models.rankNet import *

In [60]:
import datetime

models = [
    ("lambdaMART", get_lambdaMART_model(base_model)),
    # ("lambdaRank", get_laWmbdaRank_model(base_model)),
    # ("rankSVM", get_rankSVM_model(base_model)),
    # ("coordAscent", get_coord_ascent_model(base_model)),
    # ("randomForest", get_random_forest_model(base_model)),
    ("rankNet", get_ranknet_model(base_model))
]


fitting_args = (
    pt.get_dataset("irds:nfcorpus/train/nontopic").get_topics(),
    pt.get_dataset("irds:nfcorpus/train/nontopic").get_qrels(),
    pt.get_dataset("irds:nfcorpus/dev/nontopic").get_topics(),
    pt.get_dataset("irds:nfcorpus/dev/nontopic").get_qrels(),
)

fitting_durations = []
for model_name, model in models:
    print(f"Training {model_name}")
    start = datetime.datetime.now()
    model.fit(*fitting_args)
    fitting_durations.append((datetime.datetime.now() - start).total_seconds())

Training lambdaMART
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001030 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 109480, number of used features: 3
Training rankNet
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
ndcg: 0.9578656125095901


In [61]:
from pyterrier.measures import nDCG, RR, MAP

test_datasets = (
    pt.get_dataset("irds:nfcorpus/test/nontopic").get_topics(),
    pt.get_dataset("irds:nfcorpus/test/nontopic").get_qrels(),
)

basic_evaluations = pt.Experiment(
    [base_model] + [model for _, model in models],
    *test_datasets,
    names=["BM25"] + [model_name for model_name, _ in models],
    eval_metrics=[nDCG @ 10, RR @ 10, MAP],
)

basic_evaluations



Unnamed: 0,name,nDCG@10,RR@10,AP
0,BM25,0.243556,0.451312,0.092002
1,lambdaMART,0.245393,0.440647,0.095133
2,rankNet,0.240009,0.438385,0.09038


In [62]:
from fairness import fairness_evaluation, compute_df

baseline_df = compute_df(
    base_model, 
    *test_datasets,
)

model_dfs = [
    compute_df(
        model, 
        *test_datasets,
    ) for _, model in models
]

# print("Baseline")
# display(baseline_df)
# print("Models")
# for model_name, model_df in zip([model_name for model_name, _ in models], model_dfs):
#     print(model_name)
#     display(model_df)




In [63]:
fairness_evaluations = [fairness_evaluation(model_df, baseline_df) for model_df in model_dfs]

for model_name, fairness_evaluation in zip([model_name for model_name, _ in models], fairness_evaluations):
    print(model_name)
    display(fairness_evaluation)

lambdaMART


{'InterQuery': {'mean_nDCG': 0.9368,
  'std_nDCG': 0.0955,
  'range_nDCG': 0.4969,
  'fairness_score': 0.898},
 'LabelInversionRate': 0.1589,
 'IndividualFairnessViolation': 0.0,
 'KendallsTauVsBaseline': 0.6951}

rankNet


{'InterQuery': {'mean_nDCG': 0.944,
  'std_nDCG': 0.091,
  'range_nDCG': 0.4187,
  'fairness_score': 0.9036},
 'LabelInversionRate': 0.1409,
 'IndividualFairnessViolation': 0.0,
 'KendallsTauVsBaseline': 0.9479}

In [None]:
# Save the results
import json
import os

output_dir = "BM_25_letor"
os.makedirs(output_dir, exist_ok=True)

# Save the basic evaluations
basic_evaluations["time"] = [None] + fitting_durations
basic_evaluations.to_csv(os.path.join(output_dir, "basic_evaluations.csv"), index=False)

#  Combine the fairness evaluations into a single json
final_fairness = []
for model_name, eval in zip([model_name for model_name, _ in models], fairness_evaluations):
    eval["model"] = model_name
    final_fairness.append(eval)

# Save the fairness evaluations
with open(os.path.join(output_dir, "fairness_evaluation.json"), "w") as f:
    json.dump(final_fairness, f)