# Use Case: chatnoir-pyterrier for easy shared task participation

The [ChatNoir](https://chatnoir.eu/) search engine is a great way to quickly set up a shared task submission.
We will demonstrate this in this notebook that will create baseline submissions for every compatible shared task.
Compatible shared tasks do:

- use a ChatNoir-indexed document collections (e.g., the ClueWebs and MS MARCO)
- has topics and qrels in ir_datasets (or an ir_datasets-compatible extension)


## Setup


Install Python packages if run in Google Colab.


In [19]:
from sys import modules

if "google.colab" in modules:
    !pip install -q chatnoir-pyterrier python-terrier

## Configurations


In [20]:
from pathlib import Path

EXPERIMENT_DIR = Path(
    "/mnt/ceph/storage/data-in-progress/data-research/web-search/chatnoir/chatnoir-pyterrier"
)
CACHE_DIR = EXPERIMENT_DIR / "cache"

In [21]:
from dataclasses import dataclass
from typing import Optional
from chatnoir_pyterrier import Index


@dataclass(frozen=True)
class Config:
    dataset: str
    index: Index
    topics_variant: str
    campaign: str
    track: str
    year: int
    task: Optional[str] = None

In [22]:
from typing import List


configs: List[Config] = [
    Config(
        dataset="clueweb09/en/trec-web-2009",
        index="clueweb09",
        topics_variant="query",
        campaign="TREC",
        track="Web",
        year=2009,
    ),
    Config(
        dataset="clueweb09/en/trec-web-2009",
        index="clueweb09",
        topics_variant="description",
        campaign="TREC",
        track="Web",
        year=2009,
    ),
    Config(
        dataset="clueweb09/en/trec-web-2010",
        index="clueweb09",
        topics_variant="query",
        campaign="TREC",
        track="Web",
        year=2010,
    ),
    Config(
        dataset="clueweb09/en/trec-web-2010",
        index="clueweb09",
        topics_variant="description",
        campaign="TREC",
        track="Web",
        year=2010,
    ),
    Config(
        dataset="clueweb09/en/trec-web-2011",
        index="clueweb09",
        topics_variant="query",
        campaign="TREC",
        track="Web",
        year=2011,
    ),
    Config(
        dataset="clueweb09/en/trec-web-2011",
        index="clueweb09",
        topics_variant="description",
        campaign="TREC",
        track="Web",
        year=2011,
    ),
    Config(
        dataset="clueweb09/en/trec-web-2012",
        index="clueweb09",
        topics_variant="query",
        campaign="TREC",
        track="Web",
        year=2012,
    ),
    Config(
        dataset="clueweb09/en/trec-web-2012",
        index="clueweb09",
        topics_variant="description",
        campaign="TREC",
        track="Web",
        year=2012,
    ),
    Config(
        dataset="clueweb12/b13/clef-ehealth",
        index="clueweb12",
        topics_variant="text",
        campaign="CLEF",
        track="eHealth",
        year=2016,
    ),
    # No description available.
    Config(
        dataset="clueweb12/b13/ntcir-www-1",
        index="clueweb12",
        topics_variant="text",
        campaign="NTCIR",
        track="WWW",
        year=2017,
    ),
    # No description available.
    Config(
        dataset="clueweb12/b13/ntcir-www-2",
        index="clueweb12",
        topics_variant="title",
        campaign="NTCIR",
        track="WWW",
        year=2018,
    ),
    Config(
        dataset="clueweb12/b13/ntcir-www-2",
        index="clueweb12",
        topics_variant="description",
        campaign="NTCIR",
        track="WWW",
        year=2018,
    ),
    Config(
        dataset="clueweb12/b13/trec-misinfo-2019",
        index="clueweb12",
        topics_variant="title",
        campaign="TREC",
        track="Health Misinfo",
        year=2019,
    ),
    Config(
        dataset="clueweb12/b13/trec-misinfo-2019",
        index="clueweb12",
        topics_variant="description",
        campaign="TREC",
        track="Health Misinfo",
        year=2019,
    ),
    Config(
        dataset="clueweb12/touche-2020-task-2",
        index="clueweb12",
        topics_variant="title",
        campaign="CLEF",
        track="Touché",
        year=2020,
        task="2",
    ),
    Config(
        dataset="clueweb12/touche-2020-task-2",
        index="clueweb12",
        topics_variant="description",
        campaign="CLEF",
        track="Touché",
        year=2020,
        task="2",
    ),
    Config(
        dataset="clueweb12/touche-2021-task-2",
        index="clueweb12",
        topics_variant="title",
        campaign="CLEF",
        track="Touché",
        year=2021,
        task="2",
    ),
    Config(
        dataset="clueweb12/touche-2021-task-2",
        index="clueweb12",
        topics_variant="description",
        campaign="CLEF",
        track="Touché",
        year=2021,
        task="2",
    ),
    Config(
        dataset="clueweb12/touche-2022-task-2",
        index="clueweb12",
        topics_variant="title",
        campaign="CLEF",
        track="Touché",
        year=2022,
        task="2",
    ),
    Config(
        dataset="clueweb12/touche-2022-task-2",
        index="clueweb12",
        topics_variant="description",
        campaign="CLEF",
        track="Touché",
        year=2022,
        task="2",
    ),
    Config(
        dataset="clueweb12/trec-web-2013",
        index="clueweb12",
        topics_variant="query",
        campaign="TREC",
        track="Web",
        year=2013,
    ),
    Config(
        dataset="clueweb12/trec-web-2013",
        index="clueweb12",
        topics_variant="description",
        campaign="TREC",
        track="Web",
        year=2013,
    ),
    Config(
        dataset="clueweb12/trec-web-2014",
        index="clueweb12",
        topics_variant="query",
        campaign="TREC",
        track="Web",
        year=2014,
    ),
    Config(
        dataset="clueweb12/trec-web-2014",
        index="clueweb12",
        topics_variant="description",
        campaign="TREC",
        track="Web",
        year=2014,
    ),
    Config(
        dataset="gov/trec-web-2002",
        index="gov",
        topics_variant="title",
        campaign="TREC",
        track="Web",
        year=2002,
    ),
    Config(
        dataset="gov/trec-web-2002",
        index="gov",
        topics_variant="description",
        campaign="TREC",
        track="Web",
        year=2002,
    ),
    Config(
        dataset="gov/trec-web-2003",
        index="gov",
        topics_variant="title",
        campaign="TREC",
        track="Web",
        year=2003,
    ),
    Config(
        dataset="gov/trec-web-2003",
        index="gov",
        topics_variant="description",
        campaign="TREC",
        track="Web",
        year=2003,
    ),
    Config(
        dataset="gov/trec-web-2004",
        index="gov",
        topics_variant="text",
        campaign="TREC",
        track="Web",
        year=2004,
    ),
    # No description available.
    Config(
        dataset="gov2/trec-tb-2004",
        index="gov2",
        topics_variant="title",
        campaign="TREC",
        track="Terabyte",
        year=2004,
    ),
    Config(
        dataset="gov2/trec-tb-2004",
        index="gov2",
        topics_variant="description",
        campaign="TREC",
        track="Terabyte",
        year=2004,
    ),
    Config(
        dataset="gov2/trec-tb-2005",
        index="gov2",
        topics_variant="title",
        campaign="TREC",
        track="Terabyte",
        year=2005,
    ),
    Config(
        dataset="gov2/trec-tb-2005",
        index="gov2",
        topics_variant="description",
        campaign="TREC",
        track="Terabyte",
        year=2005,
    ),
    Config(
        dataset="gov2/trec-tb-2006",
        index="gov2",
        topics_variant="title",
        campaign="TREC",
        track="Terabyte",
        year=2006,
    ),
    Config(
        dataset="gov2/trec-tb-2006",
        index="gov2",
        topics_variant="description",
        campaign="TREC",
        track="Terabyte",
        year=2006,
    ),
    Config(
        dataset="msmarco-passage/trec-dl-2019",
        index="msmarco-passage",
        topics_variant="text",
        campaign="TREC",
        track="Deep Learning",
        year=2019,
    ),
    # No description available.
    Config(
        dataset="msmarco-passage/trec-dl-2020",
        index="msmarco-passage",
        topics_variant="text",
        campaign="TREC",
        track="Deep Learning",
        year=2020,
    ),
    # No description available.
    Config(
        dataset="msmarco-passage-v2/trec-dl-2021",
        index="msmarco-passage-v2",
        topics_variant="text",
        campaign="TREC",
        track="Deep Learning",
        year=2021,
    ),
    # No description available.
    Config(
        dataset="msmarco-passage-v2/trec-dl-2022",
        index="msmarco-passage-v2",
        topics_variant="text",
        campaign="TREC",
        track="Deep Learning",
        year=2022,
    ),
    # No description available.
]

### Load topics and qrels

In [23]:
from pyterrier.datasets import get_dataset

datasets = [get_dataset(f"irds:{config.dataset}") for config in configs]

In [24]:
topics = [
    dataset.get_topics(variant=config.topics_variant)
    for config, dataset in zip(configs, datasets)
]

In [25]:
# topics[0][:5]

In [26]:
qrels = [dataset.get_qrels() for config, dataset in zip(configs, datasets)]

There are multiple qrel fields available: ['relevance', 'method', 'iprob']. Defaulting to "relevance", but to use a different one, supply variant
There are multiple qrel fields available: ['relevance', 'method', 'iprob']. Defaulting to "relevance", but to use a different one, supply variant
There are multiple qrel fields available: ['relevance', 'trustworthiness', 'understandability']. Defaulting to "relevance", but to use a different one, supply variant
There are multiple qrel fields available: ['relevance', 'effectiveness', 'redibility']. Defaulting to "relevance", but to use a different one, supply variant
There are multiple qrel fields available: ['relevance', 'effectiveness', 'redibility']. Defaulting to "relevance", but to use a different one, supply variant
/home/heinrich/.ir_datasets/touche/2020/task-2/qrels.qrels
/home/heinrich/.ir_datasets/touche/2020/task-2/qrels.qrels
There are multiple qrel fields available: ['relevance', 'quality']. Defaulting to "relevance", but to use a

In [27]:
# qrels[0][:5]

### Prepare Caches

In [None]:
# cache_dirs = [CACHE_DIR / "experiment" / config.dataset for config in configs]
cache_dirs = [CACHE_DIR / "experiment5" / config.dataset / config.topics_variant for config in configs]

### Experiments

This notebook will only look at the precomputed results of the experiments in `experiments.py` and evaluate them again, given any ir_measures-compatible effectiveness measure.

In [29]:
from pyterrier import Experiment, Transformer
from pyterrier.io import read_results
from ir_measures import nDCG

names = [
    "ChatNoir",
    "ChatNoir+monoT5",
    "ChatNoir+monoT5+duoT5",
]

experiments = [
    (
        Experiment(
            retr_systems=[
                Transformer.from_df(read_results(str(cache_dir / f"{name}.res.gz")))
                for name in names
            ],
            names=names,
            topics=topics,
            qrels=qrels,
            eval_metrics=[
                # nDCG@5,
                nDCG(judged_only=True)@5,
                # nDCG,
                # nDCG(judged_only=True),
            ],
            baseline=0,
            test="t",
            correction="bonferroni",
            correction_alpha=0.05,
            verbose=True,
        )
        if all((cache_dir / f"{name}.res.gz").exists() for name in names)
        else None
    )
    for cache_dir, topics, qrels in zip(cache_dirs, topics, qrels)
]# @remote(num_cpus=1, memory=30*1000*1000*1000, num_gpus=1, accelerator_type="A100-20GB", max_retries=10, retry_exceptions=True)


pt.Experiment: 100%|██████████| 3/3 [00:00<00:00, 21.04system/s]
pt.Experiment: 100%|██████████| 3/3 [00:00<00:00, 25.25system/s]
pt.Experiment: 100%|██████████| 3/3 [00:00<00:00, 24.79system/s]
pt.Experiment: 100%|██████████| 3/3 [00:00<00:00, 25.24system/s]
pt.Experiment: 100%|██████████| 3/3 [00:00<00:00, 24.37system/s]
pt.Experiment: 100%|██████████| 3/3 [00:00<00:00, 27.62system/s]
pt.Experiment: 100%|██████████| 3/3 [00:00<00:00, 28.93system/s]
pt.Experiment: 100%|██████████| 3/3 [00:00<00:00, 32.42system/s]
pt.Experiment: 100%|██████████| 3/3 [00:01<00:00,  2.14system/s]
pt.Experiment: 100%|██████████| 3/3 [00:00<00:00, 20.43system/s]
pt.Experiment: 100%|██████████| 3/3 [00:00<00:00, 20.84system/s]
pt.Experiment: 100%|██████████| 3/3 [00:00<00:00, 22.05system/s]
pt.Experiment: 100%|██████████| 3/3 [00:00<00:00, 26.48system/s]
pt.Experiment: 100%|██████████| 3/3 [00:00<00:00, 27.73system/s]
pt.Experiment: 100%|██████████| 3/3 [00:00<00:00, 83.64system/s]
pt.Experiment: 100%|█████

In [30]:
from pandas import concat


df = concat(
    [
        experiment.assign(
            dataset=config.dataset,
            campaign=config.campaign,
            track=config.track,
            year=config.year,
            task=config.task,
            variant=config.topics_variant.replace("title", "query").replace("text", "query")
        )
        for config, experiment in zip(configs, experiments)
        if experiment is not None
    ]
)
df.columns = [col.replace("(judged_only=True)", "'") for col in df.columns]
all_names_datasets_variants = df[["name"]].drop_duplicates().merge(df[["dataset", "campaign", "track", "year", "task"]].drop_duplicates().merge(df[["variant"]].drop_duplicates(), how="cross"), how="cross")
df = df.merge(all_names_datasets_variants, how="outer")
df.sort_values(["variant", "dataset", "name"], inplace=True)
df

Unnamed: 0,name,nDCG'@5,nDCG'@5 +,nDCG'@5 -,nDCG'@5 p-value,nDCG'@5 reject,nDCG'@5 p-value corrected,dataset,campaign,track,year,task,variant
0,ChatNoir,0.155495,,,,False,,clueweb09/en/trec-web-2009,TREC,Web,2009,,description
30,ChatNoir+monoT5,0.170874,3.0,7.0,0.484797,False,0.969594,clueweb09/en/trec-web-2009,TREC,Web,2009,,description
60,ChatNoir+monoT5+duoT5,0.170577,3.0,7.0,0.491508,False,0.983016,clueweb09/en/trec-web-2009,TREC,Web,2009,,description
2,ChatNoir,0.231709,,,,False,,clueweb09/en/trec-web-2010,TREC,Web,2010,,description
32,ChatNoir+monoT5,0.269875,10.0,6.0,0.041979,False,0.083958,clueweb09/en/trec-web-2010,TREC,Web,2010,,description
...,...,...,...,...,...,...,...,...,...,...,...,...,...
57,ChatNoir+monoT5,0.218265,4.0,3.0,0.319509,False,0.639017,msmarco-passage/trec-dl-2019,TREC,Deep Learning,2019,,query
87,ChatNoir+monoT5+duoT5,0.219606,4.0,2.0,0.254365,False,0.508730,msmarco-passage/trec-dl-2019,TREC,Deep Learning,2019,,query
29,ChatNoir,0.143130,,,,False,,msmarco-passage/trec-dl-2020,TREC,Deep Learning,2020,,query
59,ChatNoir+monoT5,0.141661,3.0,2.0,0.635772,False,1.000000,msmarco-passage/trec-dl-2020,TREC,Deep Learning,2020,,query


In [31]:
df["dataset"].unique()

array(['clueweb09/en/trec-web-2009', 'clueweb09/en/trec-web-2010',
       'clueweb09/en/trec-web-2011', 'clueweb09/en/trec-web-2012',
       'clueweb12/b13/clef-ehealth', 'clueweb12/b13/ntcir-www-1',
       'clueweb12/b13/ntcir-www-2', 'clueweb12/b13/trec-misinfo-2019',
       'clueweb12/touche-2020-task-2', 'clueweb12/touche-2021-task-2',
       'clueweb12/touche-2022-task-2', 'clueweb12/trec-web-2013',
       'clueweb12/trec-web-2014', 'msmarco-passage/trec-dl-2019',
       'msmarco-passage/trec-dl-2020'], dtype=object)

#### Print Table 2 from the paper

In [32]:
from pandas import isna

measure = "nDCG'@5"
# measure = "nDCG'"

cols = [
    r"@{}",
    r"l",
    r"@{\quad}",
    *(r"r" for _ in df["dataset"].unique()),
    r"@{}",
]
print(r"\begin{tabular}{" + "".join(cols) + r"}")
print(r"\toprule")
cols1 = [
    r"\textbf{System}"
]
seps1 = []
cols2 = [
    r"",
]
seps2 = []
cols3 = [
    r"",
]
last_sep1 = 2
last_sep2 = 2
i = 2
for campaign, df_campaign in df.groupby("campaign", sort=False):
    num_cols_campaign = 0
    for (track, task), df_track in df_campaign.groupby(["track", "task"], sort=False, dropna=False):
        track = track.replace(r"é", r"{\'e}")
        track = track.replace(r"eHealth", r"eH.")
        if " " in track:
            track = "".join([x[:1].upper() for x in track.split()])
        num_cols_track = 0
        for year, df_year in df_track.groupby("year", sort=False):
            cols3.append(r"\multicolumn{1}{c}{" + f"'{(year % 100):02d}" + r"}")
            num_cols_campaign += 1
            num_cols_track += 1
            i += 1
        cols2.append(r"\multicolumn{" + f"{num_cols_track}" + r"}{c}{\textbf{" + track + (f" T{task}" if not isna(task) else "") + "}}")
        seps2.append(r"\cmidrule(lr){" + f"{last_sep2}-{i-1}" + r"}")
        last_sep2 = i
    cols1.append(r"\multicolumn{" + f"{num_cols_campaign}" + r"}{c}{\textbf{" + campaign + "}}")
    seps1.append(r"\cmidrule(lr){" + f"{last_sep1}-{i-1}" + r"}")
    last_sep1 = i
if cols1:
    cols1[-1].replace(r"{c}", r"{c@{}}")
if cols2:
    cols2[-1].replace(r"{c}", r"{c@{}}")
if cols3:
    cols3[-1].replace(r"{c}", r"{c@{}}")
print((r" & ".join(cols1).removeprefix(" ") + r" \\").removeprefix(" "))
print(r"".join(seps1))
print((r" & ".join(cols2).removeprefix(" ") + r" \\").removeprefix(" "))
print(r"".join(seps2))
print((r" & ".join(cols3).removeprefix(" ") + r" \\").removeprefix(" "))
for variant, df_variant in reversed(list(df.groupby("variant", sort=True))):
    num_cols = 1 + len(df["dataset"].unique())
    print(r"\midrule")
    print(r"\multicolumn{" + f"{num_cols}" + r"}{@{}l@{}}{\textit{" + str(variant).capitalize() + r"}} \\")
    print(r"\midrule")
    for name, df_name in df_variant.groupby("name", sort=True):
        name = name.replace("ChatNoir+monoT5+", "+ ")
        name = name.replace("ChatNoir+", "+ ")
        cols = [
            name,
        ]
        for campaign, df_campaign in df_name.groupby("campaign", sort=False):
            for (track, task), df_track in df_campaign.groupby(["track", "task"], sort=False, dropna=False):
                for year, df_year in df_track.groupby("year", sort=False):
                    assert len(df_year) == 1
                    row = df_year.iloc[0]
                    if isna(row[measure]):
                        cols.append(r"---")
                        continue
                    col = f"{row[measure]:0.2f}"
                    if row[f"{measure} reject"]:
                        col = r"\textbfn{" + col + r"}"
                    cols.append(col)
        print((r" & ".join(cols).removeprefix(" ") + r" \\").removeprefix(" "))
print(r"\bottomrule")
print(r"\end{tabular}")

\begin{tabular}{@{}l@{\quad}rrrrrrrrrrrrrrr@{}}
\toprule
\textbf{System} & \multicolumn{9}{c}{\textbf{TREC}} & \multicolumn{4}{c}{\textbf{CLEF}} & \multicolumn{2}{c}{\textbf{NTCIR}} \\
\cmidrule(lr){2-10}\cmidrule(lr){11-14}\cmidrule(lr){15-16}
& \multicolumn{6}{c}{\textbf{Web}} & \multicolumn{1}{c}{\textbf{HM}} & \multicolumn{2}{c}{\textbf{DL}} & \multicolumn{1}{c}{\textbf{eH.}} & \multicolumn{3}{c}{\textbf{Touch{\'e} T2}} & \multicolumn{2}{c}{\textbf{WWW}} \\
\cmidrule(lr){2-7}\cmidrule(lr){8-8}\cmidrule(lr){9-10}\cmidrule(lr){11-11}\cmidrule(lr){12-14}\cmidrule(lr){15-16}
& \multicolumn{1}{c}{'09} & \multicolumn{1}{c}{'10} & \multicolumn{1}{c}{'11} & \multicolumn{1}{c}{'12} & \multicolumn{1}{c}{'13} & \multicolumn{1}{c}{'14} & \multicolumn{1}{c}{'19} & \multicolumn{1}{c}{'19} & \multicolumn{1}{c}{'20} & \multicolumn{1}{c}{'16} & \multicolumn{1}{c}{'20} & \multicolumn{1}{c}{'21} & \multicolumn{1}{c}{'22} & \multicolumn{1}{c}{'17} & \multicolumn{1}{c}{'18} \\
\midrule
\multicolumn{16}