In [2]:
pip install python-terrier==0.12.1 nltk scikit-learn lightgbm fastrank LambdaRankNN

Collecting python-terrier==0.12.1
  Using cached python_terrier-0.12.1-py3-none-any.whl.metadata (11 kB)
Collecting nltk
  Using cached nltk-3.9.1-py3-none-any.whl.metadata (2.9 kB)
Collecting scikit-learn
  Using cached scikit_learn-1.6.1-cp310-cp310-macosx_12_0_arm64.whl.metadata (31 kB)
Collecting lightgbm
  Using cached lightgbm-4.6.0-py3-none-macosx_12_0_arm64.whl.metadata (17 kB)
Collecting fastrank
  Using cached fastrank-0.8.0-py3-none-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl.metadata (1.6 kB)
Collecting LambdaRankNN
  Using cached LambdaRankNN-0.1.1-py3-none-any.whl.metadata (1.8 kB)
Collecting pandas (from python-terrier==0.12.1)
  Downloading pandas-2.2.3-cp310-cp310-macosx_11_0_arm64.whl.metadata (89 kB)
Collecting more-itertools (from python-terrier==0.12.1)
  Using cached more_itertools-10.6.0-py3-none-any.whl.metadata (37 kB)
Collecting tqdm (from python-terrier==0.12.1)
  Using cached tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Collecting ir-dat

In [3]:
import pyterrier as pt
import nltk

nltk.download("punkt")

[nltk_data] Downloading package punkt to /Users/pepijn/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [4]:
dataset = pt.get_dataset("irds:nfcorpus")

In [5]:
from pathlib import Path

index = pt.index.IterDictIndexer(
    str(Path.cwd()),  # this will be ignored
    meta={
        "docno": 16,
        "title": 256,
        "abstract": 65536,
        "url": 128,
    },
    type=pt.index.IndexingType.MEMORY,
).index(dataset.get_corpus_iter(), fields=["title", "abstract", "url"])

Java started (triggered by TerrierIndexer.__init__) and loaded: pyterrier.java, pyterrier.terrier.java [version=5.11 (build: craig.macdonald 2025-01-13 21:29), helper_version=0.0.8]
nfcorpus documents: 100%|██████████| 5371/5371 [00:01<00:00, 2841.39it/s]


In [6]:
base_model = pt.terrier.FeaturesRetriever(
    index,
    wmodel="BM25",
    features=["WMODEL:BM25", "WMODEL:PL2", "WMODEL:DPH"],
    num_results=100,
    metadata=["docno", "title", "abstract", "url"],
)

In [7]:
from models.lambdaMART import *
from models.lambdaRank import *
from models.rankSVM import *
from models.coordAscent import *
from models.randomForest import *
from models.rankNet import *

In [None]:
models = [
    ("lambdaMART", get_lambdaMART_model(base_model)),
    # ("lambdaRank", get_laWmbdaRank_model(base_model)),
    # ("rankSVM", get_rankSVM_model(base_model)),
    # ("coordAscent", get_coord_ascent_model(base_model)),
    # ("randomForest", get_random_forest_model(base_model)),
    ("rankNet", get_ranknet_model(base_model))
]


fitting_args = (
    pt.get_dataset("irds:nfcorpus/train/nontopic").get_topics(),
    pt.get_dataset("irds:nfcorpus/train/nontopic").get_qrels(),
    pt.get_dataset("irds:nfcorpus/dev/nontopic").get_topics(),
    pt.get_dataset("irds:nfcorpus/dev/nontopic").get_qrels(),
)

for model_name, model in models:
    print(f"Training {model_name}")
    model.fit(*fitting_args)


In [None]:
from pyterrier.measures import nDCG, RR, MAP

basic_evaluations = pt.Experiment(
    [base_model] + [model for _, model in models],
    pt.get_dataset("irds:nfcorpus/test/nontopic").get_topics(),
    pt.get_dataset("irds:nfcorpus/test/nontopic").get_qrels(),
    names=["BM25"] + [model_name for model_name, _ in models],
    eval_metrics=[nDCG @ 10, RR @ 10, MAP],
)

basic_evaluations

In [None]:
from fairness import fairness_evaluation, compute_df

baseline_df = compute_df(
    base_model, 
    pt.get_dataset("irds:nfcorpus/test/nontopic").get_topics(),
    pt.get_dataset("irds:nfcorpus/test/nontopic").get_qrels(),
)

model_dfs = [
    compute_df(
        model, 
        pt.get_dataset("irds:nfcorpus/test/nontopic").get_topics(), 
        pt.get_dataset("irds:nfcorpus/test/nontopic").get_qrels()
    ) for _, model in models
]

print("Baseline")
display(baseline_df)
print("Models")
for model_name, model_df in zip([model_name for model_name, _ in models], model_dfs):
    print(model_name)
    display(model_df)


In [None]:
fairness_evaluations = [fairness_evaluation(model_df, baseline_df) for model_df in model_dfs]

for model_name, fairness_evaluation in zip([model_name for model_name, _ in models], fairness_evaluations):
    print(model_name)
    display(fairness_evaluation)