In [7]:
import pandas as pd
import pyterrier as pt
import numpy as np
import os
from repro_eval.Evaluator import RpdEvaluator
from repro_eval.util import arp, arp_scores
import pytrec_eval
import yaml
if not pt.started():
    pt.init(boot_packages=["com.github.terrierteam:terrier-prf:-SNAPSHOT"])

# Time Fuse
Fuse the new run with one old run by boosting old (known) documents up and new documents down.

In [64]:
def time_fuse(run_recent, run_old, _lambda=0.5):
    qid_ranking_groups = run_old.groupby('qid')
    qid_ranking_dict = {qid: list(ranking['docno']) for qid, ranking in qid_ranking_groups}
    
    def weigh(row):
        if not qid_ranking_dict.get(row['qid']):
            # These topics will be boosted down, an empty set is returned 
            print("Could not find", row['qid'])
            
        if row['docno'] in qid_ranking_dict.get(row['qid'], []):
            return row['score'] * _lambda ** 2
        else:
            return row['score'] * (1-_lambda) ** 2           
    reranking = run_recent.copy()
    
    # min max normalization per topic
    reranking['score'] = reranking.groupby('qid')['score'].transform(lambda x : x / x.max())
    
    # weight if in old ranking
    reranking['score'] = reranking.progress_apply(weigh, axis=1)
    reranking = reranking.sort_values(['qid','score'], ascending=False).groupby('qid').head(1000)
    reranking['rank'] = reranking.groupby('qid')['score'].rank(ascending=False).astype(int)
    return reranking

# Lambda sweep

In [65]:
base_path = "data"
runs_path = "results/trec"
reranked_path = "results/fuse_time"

run_new_path = "CIR_BM25_D-t3_T-t3"
run_old_path = "CIR_BM25_D-t2_T-t3_extended"

with open("data/LongEval/metadata.yml", "r") as yamlfile:
    config = yaml.load(yamlfile, Loader=yaml.FullLoader)

In [78]:
for l in np.logspace(-10, -1, num=10):
    print(l)

1e-10
1e-09
1e-08
1e-07
1e-06
1e-05
0.0001
0.001
0.01
0.1


In [84]:
for l in range(1, 11):
    print(l/100)

0.01
0.02
0.03
0.04
0.05
0.06
0.07
0.08
0.09
0.1


In [89]:
run_new = pt.io.read_results(os.path.join(base_path, runs_path, run_new_path))
run_old = pt.io.read_results(os.path.join(base_path, runs_path, run_old_path))

with open(os.path.join(base_path, config["subcollections"]["t3"]["qrels"]["test"]), "r") as f_qrels:
    qrels = pytrec_eval.parse_qrel(f_qrels)
evaluator = pytrec_eval.RelevanceEvaluator(qrels, pytrec_eval.supported_measures)

results = {}
# for l in np.logspace(-4, -6, num=10):
# for l in np.logspace(1e-3, 1e-12 num=10):  # sweep from 1e-3 to 1e-12
# for l in np.logspace(-10, -1, num=10):
# for l in range(1, 11):
#     l =  l/100
for l in [0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009]:
    _lambda = 0.6 + l
    run_reranked = time_fuse(run_new, run_old, _lambda=_lambda)
    results[_lambda] = {}
    
    # write results
    run_name = f'CIR_BM25_D-t3_T-t3_rr-t2-{l}'
    run_reranked_path = os.path.join(base_path, reranked_path, run_name)
    pt.io.write_results(run_reranked, run_reranked_path, format='trec', run_name=run_name)

    # evaluate
    rpd_eval = RpdEvaluator(run_b_orig_path=os.path.join(base_path, runs_path, run_new_path), run_b_rep_path=run_reranked_path)
    
    correlations = rpd_eval.ktau_union().get('baseline')
    correlation_scores = [x for x in list(correlations.values()) if ~np.isnan(x)]
    avg_tau = sum(correlation_scores) / len(correlation_scores)
    results[_lambda]["tau"] = avg_tau
    
    with open(run_reranked_path) as run_reranked:
        run = pytrec_eval.parse_run(run_reranked)
        scores = evaluator.evaluate(run)
        results[_lambda]["arp"] = arp_scores(scores)


 42%|████▏     | 247900/585414 [00:02<00:02, 122700.51it/s]

Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713


100%|██████████| 585414/585414 [00:05<00:00, 116434.33it/s]
 42%|████▏     | 247979/585414 [00:02<00:02, 122008.31it/s]

Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713


100%|██████████| 585414/585414 [00:05<00:00, 115827.35it/s]
 42%|████▏     | 248407/585414 [00:02<00:02, 123277.50it/s]

Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713


100%|██████████| 585414/585414 [00:05<00:00, 116710.14it/s]
 42%|████▏     | 247781/585414 [00:02<00:02, 122709.43it/s]

Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713


100%|██████████| 585414/585414 [00:05<00:00, 116542.81it/s]
 42%|████▏     | 247447/585414 [00:02<00:02, 121831.12it/s]

Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713


100%|██████████| 585414/585414 [00:05<00:00, 115840.96it/s]
 42%|████▏     | 247024/585414 [00:02<00:02, 122155.31it/s]

Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713


100%|██████████| 585414/585414 [00:05<00:00, 115875.17it/s]
 43%|████▎     | 249339/585414 [00:02<00:02, 123832.79it/s]

Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713


100%|██████████| 585414/585414 [00:04<00:00, 117172.58it/s]
 43%|████▎     | 251273/585414 [00:02<00:02, 124366.58it/s]

Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713


100%|██████████| 585414/585414 [00:04<00:00, 117675.01it/s]
 43%|████▎     | 250602/585414 [00:02<00:02, 124099.68it/s]

Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713
Could not find q012351539607713


100%|██████████| 585414/585414 [00:04<00:00, 117525.35it/s]


In [91]:
for l in results.keys():
    print("|", " | ".join([
        str(l), 
        str(round(results[l]["tau"], 4)), 
        str(round(results[l]["arp"]["P_10"], 4)), 
        str(round(results[l]["arp"]["bpref"], 4)), 
        str(round(results[l]["arp"]["ndcg"], 4))
        ]), "|")

| 0.601 | 0.0133 | 0.1156 | 0.4196 | 0.2839 |
| 0.602 | 0.0131 | 0.1156 | 0.4196 | 0.2836 |
| 0.603 | 0.0129 | 0.1156 | 0.4195 | 0.2833 |
| 0.604 | 0.013 | 0.1156 | 0.4194 | 0.2831 |
| 0.605 | 0.0123 | 0.1154 | 0.4193 | 0.2828 |
| 0.606 | 0.0126 | 0.1152 | 0.4193 | 0.2826 |
| 0.607 | 0.0129 | 0.1152 | 0.4192 | 0.2823 |
| 0.608 | 0.0128 | 0.1149 | 0.4192 | 0.2821 |
| 0.609 | 0.0129 | 0.1149 | 0.4192 | 0.2819 |


# Filter Fuse

In [234]:
import numpy as np
from repro_eval.Evaluator import RpdEvaluator
from ranx import Run, fuse

In [235]:
def filter_and_fuse(run_recent, old_runs: list):
    qid_ranking_groups = run_recent.groupby('qid')
    qid_ranking_dict_recent = {qid: pd.Series(ranking['score'].values, ranking['docno']).to_dict() for qid, ranking in qid_ranking_groups}
    
    runs = [Run.from_dict(qid_ranking_dict_recent)]
    
    for run_old in old_runs:
        qid_ranking_groups = run_old.groupby('qid')
        qid_ranking_dict_old = {qid: pd.Series(ranking['score'].values, ranking['docno']).to_dict() for qid, ranking in qid_ranking_groups}
        for qid, ranking in qid_ranking_dict_old.items():
            docs_recent = qid_ranking_dict_recent.get(qid).keys()
            qid_ranking_dict_old[qid] = {docid: score for docid, score in ranking.items() if docid in docs_recent}
        runs.append(Run.from_dict(qid_ranking_dict_old))
    
    combined_run = fuse(runs = runs, method = "rrf")

    return combined_run

In [256]:
base_path = "data"
runs_path = "results/trec"
reranked_path = "results/filter_fuse"

run_new_path = "CIR_BM25_D-t3_T-t3"
run_old_path = "CIR_BM25_D-t2_T-t3_extended"

with open("data/LongEval/metadata.yml", "r") as yamlfile:
    config = yaml.load(yamlfile, Loader=yaml.FullLoader)

In [257]:
run_new = pt.io.read_results(os.path.join(base_path, runs_path, run_new_path))

# Load history of runs
def load_history_runs(history, sub_collection):
    history_index = ["D-"+i for i in history]
    old_runs = []
    for name in os.listdir(os.path.join(base_path, runs_path)):
        if sub_collection in name and name.endswith("extended"):
            for i in history_index:
                if i in name:
                    run = pt.io.read_results(os.path.join(base_path, runs_path, name))
                    old_runs.append(run)
    return old_runs


# find core qids
def core_topics(run_new, old_runs):
    topic_sets = []
    for i in old_runs:
        topic_sets.append(set(i["qid"]))
    topic_sets.append(set(run_new["qid"]))

    core_topics = set.intersection(*topic_sets)
    print("Found known documents for:", len(core_topics), "of", len(run_new["qid"].unique()), "topics")
    return core_topics

In [258]:
history = ["t2"]
old_runs = load_history_runs(history, "t3")
core_topics = core_topics(run_new, old_runs)

missing_topics = set(run_new["qid"]) - set(core_topics)

Found known documents for: 597 of 598 topics


In [239]:
old_runs_cleaned = []
for run in old_runs:
    old_runs_cleaned.append(run[run["qid"].isin(core_topics)])
old_runs = old_runs_cleaned

In [240]:
run_new_cleaned = run_new[run_new["qid"].isin(core_topics)]

In [241]:
run_reranked = filter_and_fuse(run_new_cleaned, old_runs)

In [242]:
run_name = f'CIR_BM25_D-t3_T-t3_rr-ff{"".join(history)}'
run_reranked_path = os.path.join(base_path, reranked_path, run_name)
run_reranked.save(run_reranked_path,  kind='trec')

In [243]:
# add missing topic back to run
missing_topic_ranking = run_new[run_new["qid"].isin(missing_topics)]

In [244]:
reranked_run = pd.read_csv(run_reranked_path, sep=" ", names=["qid", "Q0", "docno", "rank", "score", "name"])


In [245]:
missing_topic_ranking["Q0"] = "Q0"

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  missing_topic_ranking["Q0"] = "Q0"


In [246]:
pd.concat([reranked_run, missing_topic_ranking], ignore_index=True).to_csv(run_reranked_path, sep=" ", header=None, index=None)

In [247]:
rpd_eval = RpdEvaluator(run_b_orig_path=os.path.join(base_path, runs_path, run_new_path), run_b_rep_path=run_reranked_path)

correlations = rpd_eval.ktau_union().get('baseline')
correlation_scores = [x for x in list(correlations.values()) if ~np.isnan(x)]

avg_tau =  sum(correlation_scores) / len(correlation_scores)

with open(run_reranked_path) as run_reranked:
    run = pytrec_eval.parse_run(run_reranked)
    scores = evaluator.evaluate(run)
    print( "|", 
          str(avg_tau), "|",
          ", ".join(history), "|",
        str(round(arp_scores(scores)["P_10"], 4)), "|",
        str(round(arp_scores(scores)["bpref"], 4)), "|",
        str(round(arp_scores(scores)["ndcg"], 4)), "|"
        )

| 0.00873296943962716 | t2 | 0.1117 | 0.4263 | 0.305 |


In [233]:
rpd_eval = RpdEvaluator(run_b_orig_path=os.path.join(base_path, runs_path, run_new_path), run_b_rep_path=run_reranked_path)

correlations = rpd_eval.ktau_union().get('baseline')
correlation_scores = [x for x in list(correlations.values()) if ~np.isnan(x)]

avg_tau =  sum(correlation_scores) / len(correlation_scores)

with open(run_reranked_path) as run_reranked:
    run = pytrec_eval.parse_run(run_reranked)
    scores = evaluator.evaluate(run)
    print( "|", 
          str(avg_tau), "|",
          ", ".join(history), "|",
        str(round(arp_scores(scores)["P_10"], 4)), "|",
        str(round(arp_scores(scores)["bpref"], 4)), "|",
        str(round(arp_scores(scores)["ndcg"], 4)), "|"
        )

| 0.005752406303693579 | t2, t1 | 0.0987 | 0.4163 | 0.2825 |


In [221]:
rpd_eval = RpdEvaluator(run_b_orig_path=os.path.join(base_path, runs_path, run_new_path), run_b_rep_path=run_reranked_path)

correlations = rpd_eval.ktau_union().get('baseline')
correlation_scores = [x for x in list(correlations.values()) if ~np.isnan(x)]

avg_tau =  sum(correlation_scores) / len(correlation_scores)

with open(run_reranked_path) as run_reranked:
    run = pytrec_eval.parse_run(run_reranked)
    scores = evaluator.evaluate(run)
    print( "|", 
          str(avg_tau), "|",
          ", ".join(history), "|",
        str(round(arp_scores(scores)["P_10"], 4)), "|",
        str(round(arp_scores(scores)["bpref"], 4)), "|",
        str(round(arp_scores(scores)["ndcg"], 4)), "|"
        )

| 0.00580605559250212 | t2, t1, t0 | 0.1007 | 0.4178 | 0.2758 |
