In [1]:
pip install python-terrier==0.12.1 nltk scikit-learn lightgbm

Note: you may need to restart the kernel to use updated packages.


In [2]:
import pyterrier as pt
import nltk

nltk.download("punkt")

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\sebim\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [3]:
dataset = pt.get_dataset("irds:nfcorpus")

In [4]:
from pathlib import Path

index = pt.index.IterDictIndexer(
    str(Path.cwd()),  # this will be ignored
    meta={
        "docno": 16,
        "title": 256,
        "abstract": 65536,
        "url": 128,
    },
    type=pt.index.IndexingType.MEMORY,
).index(dataset.get_corpus_iter(), fields=["title", "abstract", "url"])

Java started (triggered by TerrierIndexer.__init__) and loaded: pyterrier.java, pyterrier.terrier.java [version=5.11 (build: craig.macdonald 2025-01-13 21:29), helper_version=0.0.8]
nfcorpus documents: 100%|██████████| 5371/5371 [00:03<00:00, 1345.79it/s]


In [5]:
base_model = pt.terrier.FeaturesRetriever(
    index,
    wmodel="BM25",
    features=["WMODEL:BM25", "WMODEL:PL2", "WMODEL:DPH"],
    num_results=100,
)

In [6]:
from models.lambdaMART import *
from models.lambdaRank import *
from models.rankSVM import *

models = [
    ("lambdaMART", get_lambdaMART_model(base_model)),
    ("lambdaRank", get_lambdaRank_model(base_model)),
    # ("rankSVM", get_rankSVM_model(base_model)),
]


fitting_args = (
    pt.get_dataset("irds:nfcorpus/train/nontopic").get_topics(),
    pt.get_dataset("irds:nfcorpus/train/nontopic").get_qrels(),
    pt.get_dataset("irds:nfcorpus/dev/nontopic").get_topics(),
    pt.get_dataset("irds:nfcorpus/dev/nontopic").get_qrels(),
)

for model_name, model in models:
    print(f"Training {model_name}")
    model.fit(*fitting_args)


Training lambdaMART
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000242 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 109480, number of used features: 3
Training lambdaRank
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000323 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 109480, number of used features: 3


In [7]:
from pyterrier.measures import nDCG, RR, MAP

basic_evaluations = pt.Experiment(
    [base_model] + [model for _, model in models],
    pt.get_dataset("irds:nfcorpus/test/nontopic").get_topics(),
    pt.get_dataset("irds:nfcorpus/test/nontopic").get_qrels(),
    names=["BM25"] + [model_name for model_name, _ in models],
    eval_metrics=[nDCG @ 10, RR @ 10, MAP],
)

basic_evaluations





Unnamed: 0,name,nDCG@10,RR@10,AP
0,BM25,0.243556,0.451312,0.092002
1,lambdaMART,0.245393,0.440647,0.095133
2,lambdaRank,0.245815,0.447826,0.095976


In [15]:
from fairness import fairness_evaluation, compute_df

baseline_df = compute_df(
    base_model, 
    pt.get_dataset("irds:nfcorpus/test/nontopic").get_topics(),
    pt.get_dataset("irds:nfcorpus/test/nontopic").get_qrels(),
)

model_dfs = [
    compute_df(
        model, 
        pt.get_dataset("irds:nfcorpus/test/nontopic").get_topics(), 
        pt.get_dataset("irds:nfcorpus/test/nontopic").get_qrels()
    ) for _, model in models
]

print("Baseline")
display(baseline_df)
print("Models")
for model_name, model_df in zip([model_name for model_name, _ in models], model_dfs):
    print(model_name)
    display(model_df)




Baseline


Unnamed: 0,qid,query,docid,rank,features,docno,score,label
0,PLAIN-102,stopping heart disease in childhood,3953,0,"[16.655602929658308, 8.765510355432781, 11.141...",MED-3954,16.655603,
1,PLAIN-102,stopping heart disease in childhood,762,1,"[16.079501727250335, 8.024033817296976, 9.5725...",MED-763,16.079502,
2,PLAIN-102,stopping heart disease in childhood,3113,2,"[16.079501727250335, 8.024033817296976, 9.5725...",MED-3114,16.079502,
3,PLAIN-102,stopping heart disease in childhood,4246,3,"[16.079501727250335, 8.024033817296976, 9.5725...",MED-4247,16.079502,
4,PLAIN-102,stopping heart disease in childhood,4615,4,"[16.00621241391512, 7.982582186272852, 9.54262...",MED-4616,16.006212,
...,...,...,...,...,...,...,...,...
13826,PLAIN-91,chronic headaches and pork parasites,4739,95,"[7.751967675154791, 3.269163326382482, 4.14077...",MED-4740,7.751968,
13827,PLAIN-91,chronic headaches and pork parasites,4166,96,"[7.726643306524053, 3.2556434741782967, 4.1315...",MED-4167,7.726643,
13828,PLAIN-91,chronic headaches and pork parasites,4473,97,"[7.651653330675077, 3.215831286898902, 4.10442...",MED-4474,7.651653,
13829,PLAIN-91,chronic headaches and pork parasites,4133,98,"[7.482212079742674, 3.1270356839099955, 4.0438...",MED-4134,7.482212,


Models
lambdaMART


Unnamed: 0,qid,query,docid,features,docno,score,rank,label
0,PLAIN-102,stopping heart disease in childhood,3953,"[16.655602929658308, 8.765510355432781, 11.141...",MED-3954,-0.320946,6,
1,PLAIN-102,stopping heart disease in childhood,762,"[16.079501727250335, 8.024033817296976, 9.5725...",MED-763,0.095866,2,
2,PLAIN-102,stopping heart disease in childhood,3113,"[16.079501727250335, 8.024033817296976, 9.5725...",MED-3114,0.095866,3,
3,PLAIN-102,stopping heart disease in childhood,4246,"[16.079501727250335, 8.024033817296976, 9.5725...",MED-4247,0.095866,4,
4,PLAIN-102,stopping heart disease in childhood,4615,"[16.00621241391512, 7.982582186272852, 9.54262...",MED-4616,0.095866,5,
...,...,...,...,...,...,...,...,...
13826,PLAIN-91,chronic headaches and pork parasites,4739,"[7.751967675154791, 3.269163326382482, 4.14077...",MED-4740,-1.615540,57,
13827,PLAIN-91,chronic headaches and pork parasites,4166,"[7.726643306524053, 3.2556434741782967, 4.1315...",MED-4167,-1.615540,58,
13828,PLAIN-91,chronic headaches and pork parasites,4473,"[7.651653330675077, 3.215831286898902, 4.10442...",MED-4474,-1.742992,59,
13829,PLAIN-91,chronic headaches and pork parasites,4133,"[7.482212079742674, 3.1270356839099955, 4.0438...",MED-4134,-1.950842,75,


lambdaRank


Unnamed: 0,qid,query,docid,features,docno,score,rank,label
0,PLAIN-102,stopping heart disease in childhood,3953,"[16.655602929658308, 8.765510355432781, 11.141...",MED-3954,0.350033,0,
1,PLAIN-102,stopping heart disease in childhood,762,"[16.079501727250335, 8.024033817296976, 9.5725...",MED-763,0.254219,3,
2,PLAIN-102,stopping heart disease in childhood,3113,"[16.079501727250335, 8.024033817296976, 9.5725...",MED-3114,0.254219,4,
3,PLAIN-102,stopping heart disease in childhood,4246,"[16.079501727250335, 8.024033817296976, 9.5725...",MED-4247,0.254219,5,
4,PLAIN-102,stopping heart disease in childhood,4615,"[16.00621241391512, 7.982582186272852, 9.54262...",MED-4616,0.254219,6,
...,...,...,...,...,...,...,...,...
13826,PLAIN-91,chronic headaches and pork parasites,4739,"[7.751967675154791, 3.269163326382482, 4.14077...",MED-4740,-1.549501,72,
13827,PLAIN-91,chronic headaches and pork parasites,4166,"[7.726643306524053, 3.2556434741782967, 4.1315...",MED-4167,-1.549501,73,
13828,PLAIN-91,chronic headaches and pork parasites,4473,"[7.651653330675077, 3.215831286898902, 4.10442...",MED-4474,-1.529996,71,
13829,PLAIN-91,chronic headaches and pork parasites,4133,"[7.482212079742674, 3.1270356839099955, 4.0438...",MED-4134,-1.731290,77,


In [13]:
fairness_evaluations = [fairness_evaluation(model_df, baseline_df) for model_df in model_dfs]
fairness_evaluations

KeyError: 'abstract'