In [2]:
import pandas as pd
import torch
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
from sentence_transformers import SentenceTransformer, util
from collections import defaultdict

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
TOP_K = 20
FINAL_K = 5

df_collection = pd.read_pickle("subtask4b_collection_data.pkl")
df_query = pd.read_csv("subtask4b_query_tweets_dev.tsv", sep="\t")

df_collection["full_text"] = df_collection["title"].fillna('') + " " + df_collection["abstract"].fillna('')
doc_texts = df_collection["full_text"].tolist()
doc_uids = df_collection["cord_uid"].tolist()

queries = df_query["tweet_text"].tolist()
query_ids = df_query["post_id"].tolist()
true_labels = df_query["cord_uid"].tolist()

In [3]:
print("Encoding with SBERT for candidate retrieval")
sbert_model = SentenceTransformer("fine-tuned-multi-qa-MiniLM-L6-cos-v1")
doc_embeds = sbert_model.encode(doc_texts, convert_to_tensor=True, show_progress_bar=True)
query_embeds = sbert_model.encode(queries, convert_to_tensor=True, show_progress_bar=True)

topk_candidates = {}
for i, qvec in enumerate(query_embeds):
    scores = util.cos_sim(qvec, doc_embeds)[0]
    top_k = torch.topk(scores, k=TOP_K)
    indices = top_k.indices.tolist()
    topk_candidates[query_ids[i]] = [(doc_uids[j], doc_texts[j]) for j in indices]

Encoding with SBERT for candidate retrieval


Batches:   0%|          | 0/242 [00:00<?, ?it/s]

Batches:   0%|          | 0/44 [00:00<?, ?it/s]

In [7]:
#rerankers to compare
reranker_models = {
    "cross-msmarco": "cross-encoder/ms-marco-MiniLM-L6-v2",
    #"modernbert-base": "answerdotai/ModernBERT-base",
    #"tinybert-stsb": "cross-encoder/stsb-TinyBERT-L4",
    #"bge-reranker-v2": "BAAI/bge-reranker-v2-m3",
}

def mrr_at_k(predictions, k=5):
    total = 0.0
    for pred in predictions:
        if pred["true"] in pred["preds"]:
            rank = pred["preds"].index(pred["true"]) + 1
            total += 1 / rank
    return total / len(predictions)

#evaluate reranker
results_summary = {}

for model_name, model_path in reranker_models.items():
    print(f"\nEvaluating Reranker: {model_name}")

    try:
        tokenizer = AutoTokenizer.from_pretrained(model_path)
        model = AutoModelForSequenceClassification.from_pretrained(model_path).to(DEVICE)

        predictions = []

        for i, qid in enumerate(tqdm(query_ids)):
            query = queries[i]
            candidates = topk_candidates[qid]
            inputs = tokenizer(
                [query] * len(candidates),
                [text for _, text in candidates],
                padding=True,
                truncation=True,
                return_tensors='pt'
            ).to(DEVICE)

            with torch.no_grad():
                logits = model(**inputs).logits.squeeze().cpu().tolist()

            ranked = sorted(zip(candidates, logits), key=lambda x: x[1], reverse=True)
            top_preds = [doc_id for (doc_id, _), _ in ranked[:FINAL_K]]

            predictions.append({
                "post_id": qid,
                "true": true_labels[i],
                "preds": top_preds
            })

        mrr = mrr_at_k(predictions, k=FINAL_K)

        df_preds = pd.DataFrame([
            {"post_id": pred["post_id"], "preds": pred["preds"]}
            for pred in predictions
        ])

        df_preds.to_csv(f"{model_name}_predictions.tsv", sep="\t", index=False)
        results_summary[model_name] = round(mrr, 4)
        print(f"{model_name} MRR@5: {mrr:.4f}")

    except Exception as e:
        print(f"❌ Error with {model_name}: {e}")

print("\nFinal Reranker MRR@5 Scores:")
print("=" * 35)
for model, score in results_summary.items():
    print(f"{model:<20} MRR@5: {score}")


Evaluating Reranker: cross-msmarco


100%|███████████████████████████████████████| 1400/1400 [49:12<00:00,  2.11s/it]

cross-msmarco MRR@5: 0.6401

Final Reranker MRR@5 Scores:
cross-msmarco        MRR@5: 0.6401





In [8]:
#rerankers to compare
reranker_models = {
    #"cross-msmarco": "cross-encoder/ms-marco-MiniLM-L6-v2",
    "modernbert-base": "answerdotai/ModernBERT-base",
    #"tinybert-stsb": "cross-encoder/stsb-TinyBERT-L4",
    #"bge-reranker-v2": "BAAI/bge-reranker-v2-m3",
}

def mrr_at_k(predictions, k=5):
    total = 0.0
    for pred in predictions:
        if pred["true"] in pred["preds"]:
            rank = pred["preds"].index(pred["true"]) + 1
            total += 1 / rank
    return total / len(predictions)

#evaluate reranker
#results_summary = {}

for model_name, model_path in reranker_models.items():
    print(f"\nEvaluating Reranker: {model_name}")

    try:
        tokenizer = AutoTokenizer.from_pretrained(model_path)
        model = AutoModelForSequenceClassification.from_pretrained(model_path).to(DEVICE)

        predictions = []

        for i, qid in enumerate(tqdm(query_ids)):
            query = queries[i]
            candidates = topk_candidates[qid]
            inputs = tokenizer(
                [query] * len(candidates),
                [text for _, text in candidates],
                padding=True,
                truncation=True,
                return_tensors='pt'
            ).to(DEVICE)

            with torch.no_grad():
                logits = model(**inputs).logits.squeeze().cpu().tolist()

            ranked = sorted(zip(candidates, logits), key=lambda x: x[1], reverse=True)
            top_preds = [doc_id for (doc_id, _), _ in ranked[:FINAL_K]]

            predictions.append({
                "post_id": qid,
                "true": true_labels[i],
                "preds": top_preds
            })

        mrr = mrr_at_k(predictions, k=FINAL_K)

        df_preds = pd.DataFrame([
            {"post_id": pred["post_id"], "preds": pred["preds"]}
            for pred in predictions
        ])

        df_preds.to_csv(f"{model_name}_predictions.tsv", sep="\t", index=False)
        results_summary[model_name] = round(mrr, 4)
        print(f"{model_name} MRR@5: {mrr:.4f}")

    except Exception as e:
        print(f"❌ Error with {model_name}: {e}")

print("\nFinal Reranker MRR@5 Scores:")
print("=" * 35)
for model, score in results_summary.items():
    print(f"{model:<20} MRR@5: {score}")


Evaluating Reranker: modernbert-base


tokenizer_config.json:   0%|          | 0.00/20.8k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.13M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/694 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.19k [00:00<?, ?B/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

model.safetensors:   0%|          | 0.00/599M [00:00<?, ?B/s]

Some weights of ModernBertForSequenceClassification were not initialized from the model checkpoint at answerdotai/ModernBERT-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|████████████████████████████████████| 1400/1400 [10:30:03<00:00, 27.00s/it]

modernbert-base MRR@5: 0.0978

Final Reranker MRR@5 Scores:
cross-msmarco        MRR@5: 0.6401
modernbert-base      MRR@5: 0.0978



huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Process ForkProcess-11:
TOKENIZERS_PARALLELISM=(true | false)
Process ForkProcess-12:
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Process ForkProcess-9:
Process ForkProcess-2:
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Process ForkProces

In [9]:
#rerankers to compare
reranker_models = {
    #"cross-msmarco": "cross-encoder/ms-marco-MiniLM-L6-v2",
    #"modernbert-base": "answerdotai/ModernBERT-base",
    "tinybert-stsb": "cross-encoder/stsb-TinyBERT-L4",
    #"bge-reranker-v2": "BAAI/bge-reranker-v2-m3",
}

def mrr_at_k(predictions, k=5):
    total = 0.0
    for pred in predictions:
        if pred["true"] in pred["preds"]:
            rank = pred["preds"].index(pred["true"]) + 1
            total += 1 / rank
    return total / len(predictions)

#evaluate reranker
#results_summary = {}

for model_name, model_path in reranker_models.items():
    print(f"\nEvaluating Reranker: {model_name}")

    try:
        tokenizer = AutoTokenizer.from_pretrained(model_path)
        model = AutoModelForSequenceClassification.from_pretrained(model_path).to(DEVICE)

        predictions = []

        for i, qid in enumerate(tqdm(query_ids)):
            query = queries[i]
            candidates = topk_candidates[qid]
            inputs = tokenizer(
                [query] * len(candidates),
                [text for _, text in candidates],
                padding=True,
                truncation=True,
                return_tensors='pt'
            ).to(DEVICE)

            with torch.no_grad():
                logits = model(**inputs).logits.squeeze().cpu().tolist()

            ranked = sorted(zip(candidates, logits), key=lambda x: x[1], reverse=True)
            top_preds = [doc_id for (doc_id, _), _ in ranked[:FINAL_K]]

            predictions.append({
                "post_id": qid,
                "true": true_labels[i],
                "preds": top_preds
            })

        mrr = mrr_at_k(predictions, k=FINAL_K)

        df_preds = pd.DataFrame([
            {"post_id": pred["post_id"], "preds": pred["preds"]}
            for pred in predictions
        ])

        df_preds.to_csv(f"{model_name}_predictions.tsv", sep="\t", index=False)
        results_summary[model_name] = round(mrr, 4)
        print(f"{model_name} MRR@5: {mrr:.4f}")

    except Exception as e:
        print(f"❌ Error with {model_name}: {e}")

print("\nFinal Reranker MRR@5 Scores:")
print("=" * 35)
for model, score in results_summary.items():
    print(f"{model:<20} MRR@5: {score}")


Evaluating Reranker: tinybert-stsb


tokenizer_config.json:   0%|          | 0.00/1.33k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/132 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/647 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/57.4M [00:00<?, ?B/s]

100%|███████████████████████████████████████| 1400/1400 [30:46<00:00,  1.32s/it]

tinybert-stsb MRR@5: 0.1683

Final Reranker MRR@5 Scores:
cross-msmarco        MRR@5: 0.6401
modernbert-base      MRR@5: 0.0978
tinybert-stsb        MRR@5: 0.1683





In [11]:
#rerankers to compare
reranker_models = {
    #"cross-msmarco": "cross-encoder/ms-marco-MiniLM-L6-v2",
    #"modernbert-base": "answerdotai/ModernBERT-base",
    #"tinybert-stsb": "cross-encoder/stsb-TinyBERT-L4",
    #"bge-reranker-v2": "BAAI/bge-reranker-v2-m3",
    "nli-deberta" : "cross-encoder/nli-deberta-v3-base"
}

def mrr_at_k(predictions, k=5):
    total = 0.0
    for pred in predictions:
        if pred["true"] in pred["preds"]:
            rank = pred["preds"].index(pred["true"]) + 1
            total += 1 / rank
    return total / len(predictions)

#evaluate reranker
#results_summary = {}

for model_name, model_path in reranker_models.items():
    print(f"\nEvaluating Reranker: {model_name}")

    try:
        tokenizer = AutoTokenizer.from_pretrained(model_path)
        model = AutoModelForSequenceClassification.from_pretrained(model_path).to(DEVICE)

        predictions = []

        for i, qid in enumerate(tqdm(query_ids)):
            query = queries[i]
            candidates = topk_candidates[qid]
            inputs = tokenizer(
                [query] * len(candidates),
                [text for _, text in candidates],
                padding=True,
                truncation=True,
                return_tensors='pt'
            ).to(DEVICE)

            with torch.no_grad():
                logits = model(**inputs).logits.squeeze().cpu().tolist()

            ranked = sorted(zip(candidates, logits), key=lambda x: x[1], reverse=True)
            top_preds = [doc_id for (doc_id, _), _ in ranked[:FINAL_K]]

            predictions.append({
                "post_id": qid,
                "true": true_labels[i],
                "preds": top_preds
            })

        mrr = mrr_at_k(predictions, k=FINAL_K)

        df_preds = pd.DataFrame([
            {"post_id": pred["post_id"], "preds": pred["preds"]}
            for pred in predictions
        ])

        df_preds.to_csv(f"{model_name}_predictions.tsv", sep="\t", index=False)
        results_summary[model_name] = round(mrr, 4)
        print(f"{model_name} MRR@5: {mrr:.4f}")

    except Exception as e:
        print(f"❌ Error with {model_name}: {e}")

print("\nFinal Reranker MRR@5 Scores:")
print("=" * 35)
for model, score in results_summary.items():
    print(f"{model:<20} MRR@5: {score}")


Evaluating Reranker: nli-deberta


tokenizer_config.json:   0%|          | 0.00/1.35k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


spm.model:   0%|          | 0.00/2.46M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/8.66M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/301 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.05k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/738M [00:00<?, ?B/s]

100%|█████████████████████████████████████| 1400/1400 [6:17:50<00:00, 16.19s/it]

nli-deberta MRR@5: 0.0731

Final Reranker MRR@5 Scores:
cross-msmarco        MRR@5: 0.6401
modernbert-base      MRR@5: 0.0978
tinybert-stsb        MRR@5: 0.1683
nli-deberta          MRR@5: 0.0731



