From f889bc40665952f1698f4bd131bc0093276e279c Mon Sep 17 00:00:00 2001 From: Jasper Xian <41269031+jasper-xian@users.noreply.github.com> Date: Thu, 12 Oct 2023 15:10:55 -0400 Subject: [PATCH] 2CR for dense AToMiC retrieval (#1674) AToMiC 2CR initially for model ViT-L-14.laion2b_s32b_b82k --- docs/2cr/atomic.html | 472 ++++++++++++++++++++++++++ pyserini/2cr/atomic.py | 258 ++++++++++++++ pyserini/2cr/atomic.yaml | 49 +++ pyserini/2cr/atomic_html.template | 242 +++++++++++++ pyserini/2cr/atomic_html_row.template | 141 ++++++++ pyserini/encoded_query_info.py | 22 +- pyserini/search/_base.py | 6 + 7 files changed, 1189 insertions(+), 1 deletion(-) create mode 100644 docs/2cr/atomic.html create mode 100644 pyserini/2cr/atomic.py create mode 100644 pyserini/2cr/atomic.yaml create mode 100644 pyserini/2cr/atomic_html.template create mode 100644 pyserini/2cr/atomic_html_row.template diff --git a/docs/2cr/atomic.html b/docs/2cr/atomic.html new file mode 100644 index 000000000..5e08013bc --- /dev/null +++ b/docs/2cr/atomic.html @@ -0,0 +1,472 @@ + + + + + + + Pyserini Reproductions + + + + + + + + + + + + +
+
+
+
+

AToMiC

+
+
+
+
+ + +
+ +

Key:

+ + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Small T2ISmall I2TBase T2IBase I2TLarge T2ILarge I2T
val queriesMRR@10R@10R@1000MRR@10R@10R@1000MRR@10R@10R@1000MRR@10R@10R@1000MRR@10R@10R@1000MRR@10R@10R@1000
+
+ + + + + + +
+
+Command to generate run: + +
+
python -m pyserini.search.faiss \
+  --topics atomic-v0.2.1-text-validation \
+  --index atomic-v0.2.ViT-L-14.laion2b_s32b_b82k.image.validation \
+  --encoded-queries atomic-v0.2.1-text-ViT-L-14.laion2b_s32b_b82k-validation \
+  --hits 1000 --batch-size 256 --threads 32 \
+  --output run.atomic.ViT-L-14.laion2b_s32b_b82k.small-t2i.trec
+
+Evaluation commands: + +
+
python -m pyserini.eval.trec_eval \
+  -c -m recip_rank -M 10 atomic.validation.t2i \
+  run.atomic.ViT-L-14.laion2b_s32b_b82k.small-t2i.trec
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.10 atomic.validation.t2i \
+  run.atomic.ViT-L-14.laion2b_s32b_b82k.small-t2i.trec
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.1000 atomic.validation.t2i \
+  run.atomic.ViT-L-14.laion2b_s32b_b82k.small-t2i.trec
+
+ +
+
+Command to generate run: + +
+
python -m pyserini.search.faiss \
+  --topics atomic-v0.2-image-validation \
+  --index atomic-v0.2.1.ViT-L-14.laion2b_s32b_b82k.text.validation \
+  --encoded-queries atomic-v0.2-image-ViT-L-14.laion2b_s32b_b82k-validation \
+  --hits 1000 --batch-size 256 --threads 32 \
+  --output run.atomic.ViT-L-14.laion2b_s32b_b82k.small-i2t.trec
+
+Evaluation commands: + +
+
python -m pyserini.eval.trec_eval \
+  -c -m recip_rank -M 10 atomic.validation.i2t \
+  run.atomic.ViT-L-14.laion2b_s32b_b82k.small-i2t.trec
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.10 atomic.validation.i2t \
+  run.atomic.ViT-L-14.laion2b_s32b_b82k.small-i2t.trec
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.1000 atomic.validation.i2t \
+  run.atomic.ViT-L-14.laion2b_s32b_b82k.small-i2t.trec
+
+ +
+
+Command to generate run: + +
+
python -m pyserini.search.faiss \
+  --topics atomic-v0.2.1-text-validation \
+  --index atomic-v0.2.ViT-L-14.laion2b_s32b_b82k.image.base \
+  --encoded-queries atomic-v0.2.1-text-ViT-L-14.laion2b_s32b_b82k-validation \
+  --hits 1000 --batch-size 256 --threads 32 \
+  --output run.atomic.ViT-L-14.laion2b_s32b_b82k.base-t2i.trec
+
+Evaluation commands: + +
+
python -m pyserini.eval.trec_eval \
+  -c -m recip_rank -M 10 atomic.base.t2i \
+  run.atomic.ViT-L-14.laion2b_s32b_b82k.base-t2i.trec
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.10 atomic.base.t2i \
+  run.atomic.ViT-L-14.laion2b_s32b_b82k.base-t2i.trec
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.1000 atomic.base.t2i \
+  run.atomic.ViT-L-14.laion2b_s32b_b82k.base-t2i.trec
+
+ +
+
+Command to generate run: + +
+
python -m pyserini.search.faiss \
+  --topics atomic-v0.2-image-validation \
+  --index atomic-v0.2.1.ViT-L-14.laion2b_s32b_b82k.text.base \
+  --encoded-queries atomic-v0.2-image-ViT-L-14.laion2b_s32b_b82k-validation \
+  --hits 1000 --batch-size 256 --threads 32 \
+  --output run.atomic.ViT-L-14.laion2b_s32b_b82k.base-i2t.trec
+
+Evaluation commands: + +
+
python -m pyserini.eval.trec_eval \
+  -c -m recip_rank -M 10 atomic.base.i2t \
+  run.atomic.ViT-L-14.laion2b_s32b_b82k.base-i2t.trec
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.10 atomic.base.i2t \
+  run.atomic.ViT-L-14.laion2b_s32b_b82k.base-i2t.trec
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.1000 atomic.base.i2t \
+  run.atomic.ViT-L-14.laion2b_s32b_b82k.base-i2t.trec
+
+ +
+
+Command to generate run: + +
+
python -m pyserini.search.faiss \
+  --topics atomic-v0.2.1-text-validation \
+  --index atomic-v0.2.ViT-L-14.laion2b_s32b_b82k.image.large \
+  --encoded-queries atomic-v0.2.1-text-ViT-L-14.laion2b_s32b_b82k-validation \
+  --hits 1000 --batch-size 256 --threads 32 \
+  --output run.atomic.ViT-L-14.laion2b_s32b_b82k.large-t2i.trec
+
+Evaluation commands: + +
+
python -m pyserini.eval.trec_eval \
+  -c -m recip_rank -M 10 atomic.large.t2i \
+  run.atomic.ViT-L-14.laion2b_s32b_b82k.large-t2i.trec
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.10 atomic.large.t2i \
+  run.atomic.ViT-L-14.laion2b_s32b_b82k.large-t2i.trec
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.1000 atomic.large.t2i \
+  run.atomic.ViT-L-14.laion2b_s32b_b82k.large-t2i.trec
+
+ +
+
+Command to generate run: + +
+
python -m pyserini.search.faiss \
+  --topics atomic-v0.2-image-validation \
+  --index atomic-v0.2.1.ViT-L-14.laion2b_s32b_b82k.text.large \
+  --encoded-queries atomic-v0.2-image-ViT-L-14.laion2b_s32b_b82k-validation \
+  --hits 1000 --batch-size 256 --threads 32 \
+  --output run.atomic.ViT-L-14.laion2b_s32b_b82k.large-i2t.trec
+
+Evaluation commands: + +
+
python -m pyserini.eval.trec_eval \
+  -c -m recip_rank -M 10 atomic.large.i2t \
+  run.atomic.ViT-L-14.laion2b_s32b_b82k.large-i2t.trec
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.10 atomic.large.i2t \
+  run.atomic.ViT-L-14.laion2b_s32b_b82k.large-i2t.trec
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.1000 atomic.large.i2t \
+  run.atomic.ViT-L-14.laion2b_s32b_b82k.large-i2t.trec
+
+ +
+
+ + +
+
+ +
+ + + + + + + + + + + + diff --git a/pyserini/2cr/atomic.py b/pyserini/2cr/atomic.py new file mode 100644 index 000000000..ae3c64fb1 --- /dev/null +++ b/pyserini/2cr/atomic.py @@ -0,0 +1,258 @@ +import argparse +import os +import sys +from collections import defaultdict +from string import Template +import pkg_resources +import time +import yaml +import math +from ._base import run_eval_and_return_metric, ok_str, fail_str + + +atomic_models = [ + 'ViT-L-14.laion2b_s32b_b82k', +] + +trec_eval_metric_definitions = { + 'MRR@10': '-c -m recip_rank -M 10', + 'R@10': '-c -m recall.10', + 'R@1000': '-c -m recall.1000' +} + +def format_run_command(raw): + return raw.replace('--topics', '\\\n --topics')\ + .replace('--index', '\\\n --index')\ + .replace('--encoded-queries', '\\\n --encoded-queries')\ + .replace('--output ', '\\\n --output ')\ + .replace('--hits ', '\\\n --hits ') + + +def format_eval_command(raw): + return raw.replace('-c ', '\\\n -c ')\ + .replace('run.', '\\\n run.') + +def read_file(f): + fin = open(f, 'r') + text = fin.read() + fin.close() + + return text + +def list_models(): + for model in atomic_models: + print(model) + +def get_conditions(): + with open(pkg_resources.resource_filename(__name__, 'atomic.yaml')) as f: + yaml_data = yaml.safe_load(f) + + return [condition['name'] for condition in yaml_data['conditions']] + +def list_conditions(): + with open(pkg_resources.resource_filename(__name__, 'atomic.yaml')) as f: + yaml_data = yaml.safe_load(f) + for condition in yaml_data['conditions']: + print(condition['name']) + + +def print_results(table, metric): + print(f'Metric = {metric}') + print(' ' * 35, end='') + conditions = get_conditions() + for condition in conditions: + print(f'{condition}' + ' ' * 5, end='') + print('') + for model in atomic_models: + print(f'{model:35}', end='') + for condition in conditions: + print(f'{table[model][condition][metric]:.3f}' + ' ' * len(condition), end='') + print('') + print('') + +def generate_report(args): + table = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: 0.0))) + commands = defaultdict(lambda: defaultdict(lambda: '')) + eval_commands = defaultdict(lambda: defaultdict(lambda: '')) + + html_template = read_file(pkg_resources.resource_filename(__name__, 'atomic_html.template')) + row_template = read_file(pkg_resources.resource_filename(__name__, 'atomic_html_row.template')) + + with open(pkg_resources.resource_filename(__name__, 'atomic.yaml')) as f: + yaml_data = yaml.safe_load(f) + for condition in yaml_data['conditions']: + name = condition['name'] + split = name.split('-')[0] # small, base, large + retrieval_type = name.split('-')[1] # t2i or i2t + cmd_template = condition['command'] + + for models in condition['models']: + model = models['model'] + + runfile = os.path.join(args.directory, f'run.atomic.{model}.{name}.trec') + cmd = Template(cmd_template).substitute(model=model, output=runfile) + commands[model][name] = format_run_command(cmd) + + for expected in models['scores']: + for metric in expected: + if split == 'small': + split = 'validation' + eval_cmd = f'python -m pyserini.eval.trec_eval ' + \ + f'{trec_eval_metric_definitions[metric]} atomic.{split}.{retrieval_type} {runfile}' + eval_commands[model][name] += format_eval_command(eval_cmd) + '\n\n' + + table[model][name][metric] = expected[metric] + + row_cnt = 1 + html_rows = [] + for model in atomic_models: + s = Template(row_template) + s = s.substitute(row_cnt=row_cnt, + model=model, + s1=f'{table[model]["small-t2i"]["MRR@10"]:8.4f}', + s2=f'{table[model]["small-t2i"]["R@10"]:8.4f}', + s3=f'{table[model]["small-t2i"]["R@1000"]:8.4f}', + s4=f'{table[model]["small-i2t"]["MRR@10"]:8.4f}', + s5=f'{table[model]["small-i2t"]["R@10"]:8.4f}', + s6=f'{table[model]["small-i2t"]["R@1000"]:8.4f}', + s7=f'{table[model]["base-t2i"]["MRR@10"]:8.4f}', + s8=f'{table[model]["base-t2i"]["R@10"]:8.4f}', + s9=f'{table[model]["base-t2i"]["R@1000"]:8.4f}', + s10=f'{table[model]["base-i2t"]["MRR@10"]:8.4f}', + s11=f'{table[model]["base-i2t"]["R@10"]:8.4f}', + s12=f'{table[model]["base-i2t"]["R@1000"]:8.4f}', + s13=f'{table[model]["large-t2i"]["MRR@10"]:8.4f}', + s14=f'{table[model]["large-t2i"]["R@10"]:8.4f}', + s15=f'{table[model]["large-t2i"]["R@1000"]:8.4f}', + s16=f'{table[model]["large-i2t"]["MRR@10"]:8.4f}', + s17=f'{table[model]["large-i2t"]["MRR@10"]:8.4f}', + s18=f'{table[model]["large-i2t"]["R@1000"]:8.4f}', + cmd1=commands[model]["small-t2i"], + cmd2=commands[model]["small-i2t"], + cmd3=commands[model]["base-t2i"], + cmd4=commands[model]["base-i2t"], + cmd5=commands[model]["large-t2i"], + cmd6=commands[model]["large-i2t"], + eval_cmd1=eval_commands[model]["small-t2i"].rstrip(), + eval_cmd2=eval_commands[model]["small-i2t"].rstrip(), + eval_cmd3=eval_commands[model]["base-t2i"].rstrip(), + eval_cmd4=eval_commands[model]["base-i2t"].rstrip(), + eval_cmd5=eval_commands[model]["large-t2i"].rstrip(), + eval_cmd6=eval_commands[model]["large-i2t"].rstrip(), + ) + + html_rows.append(s) + row_cnt += 1 + + all_rows = '\n'.join(html_rows) + with open(args.output, 'w') as out: + out.write(Template(html_template).substitute(title='AToMiC', rows=all_rows)) + +def run_conditions(args): + start = time.time() + + table = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: 0.0))) + + with open(pkg_resources.resource_filename(__name__, 'atomic.yaml')) as f: + yaml_data = yaml.safe_load(f) + for condition in yaml_data['conditions']: + name = condition['name'] + split = name.split('-')[0] # small, base, large + retrieval_type = name.split('-')[1] # t2i or i2t + cmd_template = condition['command'] + + if args.all or args.condition == name: + print(f'condition {name}:') + else: + continue + + for models in condition['models']: + model = models['model'] + + if args.all: + pass + elif args.condition != name: + continue + elif args.model and args.model != model: + continue + + print(f' - Model: {model}') + + runfile = os.path.join(args.directory, f'run.atomic.{model}.{name}.txt') + cmd = Template(cmd_template).substitute(model=model, output=runfile) + + if args.display_commands: + print(f'\n```bash\n{format_run_command(cmd)}\n```\n') + + if not os.path.exists(runfile): + if not args.dry_run: + os.system(cmd) + + for expected in models['scores']: + for metric in expected: + if not args.skip_eval: + if not os.path.exists(runfile): + continue + if split == 'small': + split = 'validation' + + score = float(run_eval_and_return_metric(metric, f'atomic.{split}.{retrieval_type}', + trec_eval_metric_definitions[metric], runfile)) + result = ok_str if math.isclose(score, float(expected[metric])) \ + else fail_str + f' expected {expected[metric]:.4f}' + print(f' {metric:7}: {score:.4f} {result}') + + table[model][name][metric] = score + else: + table[model][name][metric] = expected[metric] + + print('') + + for metric in trec_eval_metric_definitions: + print_results(table, metric) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Generate regression matrix for AToMiC.') + # To list all conditions/models + parser.add_argument('--list-conditions', action='store_true', default=False, help='List available conditions.') + parser.add_argument('--list-models', action='store_true', default=False, help='List available datasets.') + # For generating reports + parser.add_argument('--generate-report', action='store_true', default=False, help='Generate report.') + parser.add_argument('--output', type=str, help='File to store report.', required=False) + # For actually running the experimental conditions + parser.add_argument('--all', action='store_true', default=False, help='Run all conditions.') + parser.add_argument('--condition', type=str, help='Condition to run.', required=False) + parser.add_argument('--model', type=str, help='Model to run.', required=False) + parser.add_argument('--directory', type=str, help='Base directory.', default='', required=False) + parser.add_argument('--dry-run', action='store_true', default=False, help='Print out commands but do not execute.') + parser.add_argument('--skip-eval', action='store_true', default=False, help='Skip running trec_eval.') + parser.add_argument('--display-commands', action='store_true', default=False, help='Display command.') + args = parser.parse_args() + + if args.list_conditions: + list_conditions() + sys.exit() + + if args.list_models: + list_models() + sys.exit() + + if args.generate_report: + if not args.output: + print(f'Must specify report filename with --output.') + sys.exit() + + generate_report(args) + sys.exit() + + if not args.all and not args.condition: + print(f'Must specify a specific condition using --condition or use --all to run all conditions.') + sys.exit() + + if args.all and (args.condition or args.model): + print('Specifying --all will run all conditions and models') + sys.exit() + + run_conditions(args) + diff --git a/pyserini/2cr/atomic.yaml b/pyserini/2cr/atomic.yaml new file mode 100644 index 000000000..f9333a72b --- /dev/null +++ b/pyserini/2cr/atomic.yaml @@ -0,0 +1,49 @@ +conditions: + - name: small-t2i + command: python -m pyserini.search.faiss --topics atomic-v0.2.1-text-validation --index atomic-v0.2.${model}.image.validation --encoded-queries atomic-v0.2.1-text-${model}-validation --hits 1000 --batch-size 256 --threads 32 --output $output + models: + - model: ViT-L-14.laion2b_s32b_b82k + scores: + - MRR@10: 0.3142 + R@10: 0.4900 + R@1000: 0.9451 + - name: small-i2t + command: python -m pyserini.search.faiss --topics atomic-v0.2-image-validation --index atomic-v0.2.1.${model}.text.validation --encoded-queries atomic-v0.2-image-${model}-validation --hits 1000 --batch-size 256 --threads 32 --output $output + models: + - model: ViT-L-14.laion2b_s32b_b82k + scores: + - MRR@10: 0.3158 + R@10: 0.4797 + R@1000: 0.9418 + - name: base-t2i + command: python -m pyserini.search.faiss --topics atomic-v0.2.1-text-validation --index atomic-v0.2.${model}.image.base --encoded-queries atomic-v0.2.1-text-${model}-validation --hits 1000 --batch-size 256 --threads 32 --output $output + models: + - model: ViT-L-14.laion2b_s32b_b82k + scores: + - MRR@10: 0.0708 + R@10: 0.1211 + R@1000: 0.4565 + - name: base-i2t + command: python -m pyserini.search.faiss --topics atomic-v0.2-image-validation --index atomic-v0.2.1.${model}.text.base --encoded-queries atomic-v0.2-image-${model}-validation --hits 1000 --batch-size 256 --threads 32 --output $output + models: + - model: ViT-L-14.laion2b_s32b_b82k + scores: + - MRR@10: 0.0716 + R@10: 0.1203 + R@1000: 0.4597 + - name: large-t2i + command: python -m pyserini.search.faiss --topics atomic-v0.2.1-text-validation --index atomic-v0.2.${model}.image.large --encoded-queries atomic-v0.2.1-text-${model}-validation --hits 1000 --batch-size 256 --threads 32 --output $output + models: + - model: ViT-L-14.laion2b_s32b_b82k + scores: + - MRR@10: 0.0542 + R@10: 0.0924 + R@1000: 0.3597 + - name: large-i2t + command: python -m pyserini.search.faiss --topics atomic-v0.2-image-validation --index atomic-v0.2.1.${model}.text.large --encoded-queries atomic-v0.2-image-${model}-validation --hits 1000 --batch-size 256 --threads 32 --output $output + models: + - model: ViT-L-14.laion2b_s32b_b82k + scores: + - MRR@10: 0.0393 + R@10: 0.0764 + R@1000: 0.3343 \ No newline at end of file diff --git a/pyserini/2cr/atomic_html.template b/pyserini/2cr/atomic_html.template new file mode 100644 index 000000000..799a435ae --- /dev/null +++ b/pyserini/2cr/atomic_html.template @@ -0,0 +1,242 @@ + + + + + + + Pyserini Reproductions + + + + + + + + + + + + +
+
+
+
+

$title

+
+
+
+
+ + +
+ +

Key:

+ + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +$rows + + +
Small T2ISmall I2TBase T2IBase I2TLarge T2ILarge I2T
val queriesMRR@10R@10R@1000MRR@10R@10R@1000MRR@10R@10R@1000MRR@10R@10R@1000MRR@10R@10R@1000MRR@10R@10R@1000
+
+ +
+ + + + + + + + + + + + diff --git a/pyserini/2cr/atomic_html_row.template b/pyserini/2cr/atomic_html_row.template new file mode 100644 index 000000000..e49d3cfaf --- /dev/null +++ b/pyserini/2cr/atomic_html_row.template @@ -0,0 +1,141 @@ + + + +$model +$s1 +$s2 +$s3 + +$s4 +$s5 +$s6 + +$s7 +$s8 +$s9 + +$s10 +$s11 +$s12 + +$s13 +$s14 +$s15 + +$s16 +$s17 +$s18 + + + + +
+ + + + + + +
+
+Command to generate run: + +
+
$cmd1
+
+Evaluation commands: + +
+
${eval_cmd1}
+
+ +
+
+Command to generate run: + +
+
$cmd2
+
+Evaluation commands: + +
+
${eval_cmd2}
+
+ +
+
+Command to generate run: + +
+
$cmd3
+
+Evaluation commands: + +
+
${eval_cmd3}
+
+ +
+
+Command to generate run: + +
+
$cmd4
+
+Evaluation commands: + +
+
${eval_cmd4}
+
+ +
+
+Command to generate run: + +
+
$cmd5
+
+Evaluation commands: + +
+
${eval_cmd5}
+
+ +
+
+Command to generate run: + +
+
$cmd6
+
+Evaluation commands: + +
+
${eval_cmd6}
+
+ +
+
+ + +
+ \ No newline at end of file diff --git a/pyserini/encoded_query_info.py b/pyserini/encoded_query_info.py index 620b921c9..e3465786c 100644 --- a/pyserini/encoded_query_info.py +++ b/pyserini/encoded_query_info.py @@ -484,5 +484,25 @@ "size (bytes)": 67615770, "total_queries": 6980, "downloaded": False - }, + }, + "atomic-v0.2.1-text-ViT-L-14.laion2b_s32b_b82k-validation": { + "description": "AToMiC text v0.2.1 validation set encoded by ViT-L-14.laion2b_s32b_b82k.", + "urls": [ + "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-atomic-v0.2.1-text-ViT-L-14.laion2b_s32b_b82k-validation-20231008-e371ed.tar.gz", + ], + "md5": "bc3f51d1ddd50dfaf3377497a692d475", + "size (bytes)": 58020647, + "total_queries": 17173, + "downloaded": False + }, + "atomic-v0.2-image-ViT-L-14.laion2b_s32b_b82k-validation": { + "description": "AToMiC image v0.2 validation set encoded by ViT-L-14.laion2b_s32b_b82k.", + "urls": [ + "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-atomic-v0.2-image-ViT-L-14.laion2b_s32b_b82k-validation-20231008-e371ed.tar.gz", + ], + "md5": "3d45cbb7f39e842a03f936c78ff2620d", + "size (bytes)": 43983721, + "total_queries": 16131, + "downloaded": False + }, } diff --git a/pyserini/search/_base.py b/pyserini/search/_base.py index f20017f66..bc1a68c59 100644 --- a/pyserini/search/_base.py +++ b/pyserini/search/_base.py @@ -302,6 +302,10 @@ 'miracl-v1.0-zh-dev': JTopics.MIRACL_V10_ZH_DEV, 'miracl-v1.0-de-dev': JTopics.MIRACL_V10_DE_DEV, 'miracl-v1.0-yo-dev': JTopics.MIRACL_V10_YO_DEV, + + # AToMiC topics + 'atomic-v0.2.1-text-validation': JTopics.ATOMIC_V021_TEXT_VAL, + 'atomic-v0.2-image-validation': JTopics.ATOMIC_V021_IMAGE_VAL, } qrels_mapping = { @@ -445,6 +449,8 @@ 'miracl-v1.0-zh-dev': JQrels.MIRACL_V10_ZH_DEV, 'miracl-v1.0-de-dev': JQrels.MIRACL_V10_DE_DEV, 'miracl-v1.0-yo-dev': JQrels.MIRACL_V10_YO_DEV, + 'atomic.validation.t2i': JQrels.ATOMIC_VAL_T2I, + 'atomic.validation.i2t': JQrels.ATOMIC_VAL_I2T, }