In [None]:
%load_ext autoreload
%autoreload 2

from time import time
import pandas as pd
import numpy as np
import os
from collections import Counter, defaultdict
import pickle

In [None]:
import pyterrier as pt
pt.init()

In [None]:
qrel_path = "../data/CAST_qrels/qrels-docs.2019.txt"
qrels_df = pd.read_csv(qrel_path, delimiter=" ", header=None)
qrels_df[[3]] = qrels_df[[3]].astype(int)
qrels_df = qrels_df.drop([1], axis=1)
qrels_df.columns=["qid", "docno", "label"]
qrels = qrels_df

In [None]:
topics_path='../data/CAST-2019/test_manual_utterance.tsv' #manual

topics_df = pd.read_csv(topics_path, delimiter="\t", header=None)
topics_df.columns=["qid", "query"]
topics = topics_df
topics.head()

# STAR embeddings

In [None]:
pd.set_option('display.max_columns', None)
# pd.options.display.float_format = '${:,.8f}'.format

In [None]:
topk = [1000,2000,5000,10000]
result_with_cache_df_dict = {}
result_first_topic_df_dict = {}

In [None]:
#baseline
results_path = "../data/star-ranking/CAST-manual-queries-star-L2-ranking-top1000.tsv"
results_df = pd.read_csv(results_path, delimiter="\t", header=None)
results_df[3] = 1000-results_df[2]
results_df.columns=["qid", "docno", "rank", "score"] # Results produced by the transformers must have “qid”, “docno”, “score”, “rank” columns.
results_df = results_df.loc[results_df['rank'] < 201]
baseline = results_df
baseline.head(210)

In [None]:
# RESULTS FIRST QUERY
for k in topk:
    print(k)
    results_path = "../data/star-ranking/CAST-manual-queries-star-L2-ranking-top1000-cache-top"+str(k)+"-first-utt_new.tsv"
    results_df = pd.read_csv(results_path, delimiter="\t", header=None)
    results_df[3] = 1000-results_df[2]
    results_df.columns=["qid", "docno", "rank", "score"] # Results produced by the transformers must have “qid”, “docno”, “score”, “rank” columns.
    results_df = results_df.loc[results_df['rank'] < 201]
    result_first_topic_df_dict[k] = results_df

In [None]:
# RESULTS with CACHE
for k in topk:
    print(k)
    results_path = "../data/star-ranking/CAST-manual-queries-star-L2-ranking-top1000-cache-top"+str(k)+"-with-update.tsv"
    results_df = pd.read_csv(results_path, delimiter="\t", header=None)
    results_df[3] = 1000-results_df[2]
    results_df.columns=["qid", "docno", "rank", "score"] # Results produced by the transformers must have “qid”, “docno”, “score”, “rank” columns.
    results_df = results_df.loc[results_df['rank'] < 201]
    result_with_cache_df_dict[k] = results_df

In [None]:
%%time
experiments = list()
experiments.append(baseline)
experiments.extend(result_with_cache_df_dict.values())
print(len(experiments))
pt.Experiment(experiments, topics, qrels, 
              names=["Full-retrieval", "Cache-with-update-1000", "Cache-with-update-2000", "Cache-with-update-5000", "Cache-with-update-10000"], 
              eval_metrics=["map_cut_200", "recip_rank", "recall_200", "P_3", "P_1", "ndcg_cut_3"],
              baseline=0)

In [None]:
%%time
experiments = list()
experiments.append(baseline)
experiments.extend(result_first_topic_df_dict.values())
print(len(experiments))
pt.Experiment(experiments, topics, qrels, 
              names=["Full-retrieval", "Cache-first-query-1000", "Cache-first-query-2000", "Cache-first-query-5000", "Cache-first-query-10000"], 
              eval_metrics=["map_cut_200", "recip_rank", "recall_200", "P_3", "P_1", "ndcg_cut_3"],
              baseline=0)

In [None]:
%%time
res_df = result_first_topic_df_dict[5000]
res_df.head()
res_per_query = pt.Experiment([res_df], topics, qrels, names=["STARfirstutt10000"], 
              eval_metrics=["map", "recip_rank", "recall_200", "P_3", "P_1", "ndcg_cut_3"], perquery=True)

In [None]:
# # res_per_query = res_per_query.dropna()
# NDCG3 = res_per_query[res_per_query["measure"]=='ndcg_cut_3']
# len(list(NDCG3))
# %store NDCG3
# NDCG3

In [None]:
# res_per_query[res_per_query["qid"]=='50_8']

# ADORE(STAR) embeddings

In [None]:
topk = [1000,2000,5000,10000]
result_with_cache_df_dict = {}
result_first_topic_df_dict = {}

In [None]:
#baseline
results_path = "../data/adore-star-ranking/CAST-manual-queries-adore-star-L2-ranking-top1000.tsv"
results_df = pd.read_csv(results_path, delimiter="\t", header=None)
results_df[3] = 1000-results_df[2]
results_df.columns=["qid", "docno", "rank", "score"] # Results produced by the transformers must have “qid”, “docno”, “score”, “rank” columns.
results_df = results_df.loc[results_df['rank'] < 201]
baseline = results_df
baseline.head(210)

In [None]:
# RESULTS FIRST QUERY
for k in topk:
    print(k)
    results_path = "../data/adore-star-ranking/CAST-manual-queries-adore-star-L2-ranking-top1000-cache-top"+str(k)+"-first-utt.tsv"
    results_df = pd.read_csv(results_path, delimiter="\t", header=None)
    results_df[3] = 1000-results_df[2]
    results_df.columns=["qid", "docno", "rank", "score"] # Results produced by the transformers must have “qid”, “docno”, “score”, “rank” columns.
    results_df = results_df.loc[results_df['rank'] < 201]
    result_first_topic_df_dict[k] = results_df

In [None]:
%%time
experiments = list()
experiments.append(baseline)
experiments.extend(result_first_topic_df_dict.values())
print(len(experiments))
pt.Experiment(experiments, topics, qrels, 
              names=["Full-retrieval", "Cache-first-query-1000", "Cache-first-query-2000", "Cache-first-query-5000", "Cache-first-query-10000"], 
              eval_metrics=["map_cut_200", "recip_rank", "recall_200", "P_3", "P_1", "ndcg_cut_3"],
              baseline=0)

In [None]:
# RESULTS with CACHE
for k in topk:
    print(k)
    results_path = "../data/adore-star-ranking/CAST-manual-queries-adore-star-L2-ranking-top1000-cache-top"+str(k)+"-with-update.tsv"
    results_df = pd.read_csv(results_path, delimiter="\t", header=None)
    results_df[3] = 1000-results_df[2]
    results_df.columns=["qid", "docno", "rank", "score"] # Results produced by the transformers must have “qid”, “docno”, “score”, “rank” columns.
    results_df = results_df.loc[results_df['rank'] < 201]
    result_with_cache_df_dict[k] = results_df

In [None]:
%%time
experiments = list()
experiments.append(baseline)
experiments.extend(result_with_cache_df_dict.values())
print(len(experiments))
pt.Experiment(experiments, topics, qrels, 
              names=["Full-retrieval", "Cache-with-update-1000", "Cache-with-update-2000", "Cache-with-update-5000", "Cache-with-update-10000"], 
              eval_metrics=["map_cut_200", "recip_rank", "recall_200", "P_3", "P_1", "ndcg_cut_3"],
              baseline=0)