Use python 3.10 for this project

In [None]:
pip install "numpy<2.0" python-terrier==0.12.1 nltk scikit-learn lightgbm fastrank tensorflow==2.11 keras LambdaRankNN

In [None]:
import pyterrier as pt
import nltk

nltk.download("punkt")

Load the datasets so they are ready to be used

In [None]:
from dataset_setups import NFCorpus, Antique

datasets = {
    "nfcorpus": NFCorpus(),
    "antique": Antique()
}

In [None]:
dataset = datasets["antique"]  # Change to "antique" for the Antique dataset

In [None]:
base_model = pt.terrier.FeaturesRetriever(
    dataset.index,
    wmodel="BM25",
    features=["WMODEL:BM25", "WMODEL:PL2", "WMODEL:DPH"],
    num_results=100,
    metadata=dataset.metadata,
)

In [None]:
from models.lambdaMART import *
from models.lambdaRank import *
from models.rankSVM import *
from models.coordAscent import *
from models.randomForest import *
from models.rankNet import *
import datetime

models = [
    ("lambdaMART", get_lambdaMART_model(base_model)),
    ("lambdaRank", get_lambdaRank_model(base_model)),
    # ("rankSVM", get_rankSVM_model(base_model)),
    ("coordAscent", get_coord_ascent_model(base_model)),
    ("randomForest", get_random_forest_model(base_model)),
    ("rankNet", get_ranknet_model(base_model))
]

fitting_durations = []
for model_name, model in models:
    print(f"\nTraining {model_name}")
    start = datetime.datetime.now()
    model.fit(*dataset.get_train())
    fitting_durations.append((datetime.datetime.now() - start).total_seconds())

In [None]:
from pyterrier.measures import nDCG, RR, MAP

basic_evaluations = pt.Experiment(
    [base_model] + [model for _, model in models],
    *dataset.get_test(),
    names=["BM25"] + [model_name for model_name, _ in models],
    eval_metrics=[nDCG @ 10, RR @ 10, MAP],
)

basic_evaluations

In [None]:
from fairness import fairness_evaluation, compute_df

baseline_df = compute_df(
    base_model, 
    *dataset.get_test(),
)

model_dfs = [
    compute_df(
        model, 
        *dataset.get_test(),
    ) for _, model in models
]

# print("Baseline")
# display(baseline_df)
# print("Models")
# for model_name, model_df in zip([model_name for model_name, _ in models], model_dfs):
#     print(model_name)
#     display(model_df)


In [None]:
fairness_evaluations = [fairness_evaluation(model_df, baseline_df, text_field=dataset.primary_field) for model_df in model_dfs]

for model_name, fairness_evaluation in zip([model_name for model_name, _ in models], fairness_evaluations):
    print(model_name)
    display(fairness_evaluation)

In [None]:
# Save the results
import json
import os

output_dir = os.path.join("experiments", dataset.name + "_results")
os.makedirs(output_dir, exist_ok=True)

# Save the basic evaluations
basic_evaluations["time"] = [None] + fitting_durations
basic_evaluations.to_csv(os.path.join(output_dir, "basic_evaluations.csv"), index=False)

#  Combine the fairness evaluations into a single json
final_fairness = []
for model_name, eval in zip([model_name for model_name, _ in models], fairness_evaluations):
    eval["model"] = model_name
    final_fairness.append(eval)

# Save the fairness evaluations
with open(os.path.join(output_dir, "fairness_evaluations.json"), "w") as f:
    json.dump(final_fairness, f)