In [3]:
# # ----------------------------------------------------------
# # Information Retrieval Evaluation Exercise
# # Python version (equivalent to the provided R script)
# # ----------------------------------------------------------


# import os
# import pandas as pd
# import numpy as np
# from scipy.stats import pearsonr, ttest_rel


# #--Run this cell only if you’re using Google Colab; otherwise, skip it.---
# from google.colab import drive
# drive.mount('/content/drive')


# # ---------- CONFIGURATION ----------
# QRELS_FILE = "/content/drive/MyDrive/TRECDATA/qrels.trec8.adhoc"    # path to qrels file
# RUNS_FOLDER = "/content/drive/MyDrive/TRECDATA/"       # folder containing ~15 system runs
# TOP_K = 10


# # ---------- STEP 1: LOAD QRELS ----------
# def load_qrels(filepath):
#     """Load TREC qrels file."""
#     qrels = pd.read_csv(filepath, sep=r"\s+", header=None, names=["topic", "iter", "docid", "relevance"])
#     return qrels


# qrels = load_qrels(QRELS_FILE)


# # ---------- STEP 2: LOAD SYSTEM RUN ----------
# def load_run(set20):
#     """Load one system run in standard TREC 6-column format."""
#     run = pd.read_csv(set20, sep=r"\s+", header=None,
#                       names=["topic", "Q0", "docid", "rank", "score", "system"])
#     return run


# # ---------- STEP 3: COMPUTE PRECISION@K ----------
# def precision_at_k(run, qrels, k=10):
#     """Compute Precision@k per topic."""
#     topics = run["topic"].unique()
#     results = []


#     for t in topics:
#         docs = run[run["topic"] == t].sort_values("rank").head(k)["docid"].tolist()
#         relevant_docs = qrels[(qrels["topic"] == t) & (qrels["relevance"] > 0)]["docid"].tolist()
#         precision = len(set(docs) & set(relevant_docs)) / k
#         results.append({"topic": t, "Pk": precision})
#     return pd.DataFrame(results)


# # ---------- STEP 4: LOOP THROUGH FILES INSIDE set20 ----------
# all_scores = pd.DataFrame()


# SET_FOLDER = os.path.join(RUNS_FOLDER, "set20")
# files = os.listdir(SET_FOLDER)


# print("Files in set20:", files)


# for f in files:
#     run_path = os.path.join(SET_FOLDER, f)


#     # skip directories and non-run files
#     if os.path.isdir(run_path):
#         continue
#     if not f.startswith("input."):
#         print(f"⚠️ Skipping non-run file: {f}")
#         continue


#     print(f"📂 Evaluating: {f}")
#     run = load_run(run_path)


#     p10_df = precision_at_k(run, qrels, TOP_K)
#     p10_df["system"] = f 
    
#     all_scores = pd.concat([all_scores, p10_df], ignore_index=True)


# # ---------- STEP 5: COMPUTE AVERAGE P@10 ----------
# avg_scores = all_scores.groupby("system")["Pk"].mean().reset_index(name="avg_P10")
# avg_scores = avg_scores.sort_values("avg_P10", ascending=False)


# print("=== Average Precision@10 for set20 ===")
# print(avg_scores.to_string(index=False))






In [4]:
# ----------------------------------------------------------
# Information Retrieval Evaluation Exercise
# Python version (equivalent to the provided R script)
# ----------------------------------------------------------


import os
import pandas as pd
import numpy as np
from scipy.stats import pearsonr, ttest_rel


In [5]:
# ---------- CONFIGURATION ----------
QRELS_FILE = "qrels.trec8.adhoc"    # path to qrels file
RUNS_FOLDER = "TRECDATA"       # folder containing ~15 system runs
TOP_K = 5


# ---------- STEP 1: LOAD QRELS ----------
def load_qrels(filepath):
    """Load TREC qrels file."""
    qrels = pd.read_csv(filepath, sep=r"\s+", header=None, names=["topic", "iter", "docid", "relevance"])
    return qrels


qrels = load_qrels(QRELS_FILE)


In [6]:
# ---------- STEP 2: LOAD SYSTEM RUN ----------
def load_run(set20):
    """Load one system run in standard TREC 6-column format."""
    run = pd.read_csv(set20, sep=r"\s+", header=None,
                      names=["topic", "Q0", "docid", "rank", "score", "system"])
    return run


# ---------- STEP 3: COMPUTE PRECISION@K ----------
def precision_at_k(run, qrels, k=10):
    """Compute Precision@k per topic."""
    topics = run["topic"].unique()
    results = []


    for t in topics:
        docs = run[run["topic"] == t].sort_values("rank").head(k)["docid"].tolist()
        relevant_docs = qrels[(qrels["topic"] == t) & (qrels["relevance"] > 0)]["docid"].tolist()
        precision = len(set(docs) & set(relevant_docs)) / k
        results.append({"topic": t, "Pk": precision})
    return pd.DataFrame(results)


# ---------- STEP 4: LOOP THROUGH FILES INSIDE set20 ----------
all_scores = pd.DataFrame()


SET_FOLDER = os.path.join(RUNS_FOLDER, "set20")
files = os.listdir(SET_FOLDER)


print("Files in set20:", files)


for f in files:
    run_path = os.path.join(SET_FOLDER, f)


    # skip directories and non-run files
    if os.path.isdir(run_path):
        continue
    if not f.startswith("input."):
        print(f"⚠️ Skipping non-run file: {f}")
        continue


    print(f"📂 Evaluating: {f}")
    run = load_run(run_path)


    p10_df = precision_at_k(run, qrels, TOP_K)
    p10_df["system"] = f 
    
    all_scores = pd.concat([all_scores, p10_df], ignore_index=True)

Files in set20: ['input.acsys8alo.acsys8alo', 'input.apl8n.apl8n', 'input.CL99SDopt1.CL99SDopt1', 'input.fub99tf.fub99tf', 'input.ibmg99c.ibmg99c', 'input.INQ601.INQ601', 'input.kdd8ps16.kdd8ps16', 'input.mds08a5.mds08a5', 'input.nttd8ale.nttd8ale', 'input.ric8tpx.ric8tpx', 'input.surffal2.surffal2', 'input.unc8al42.unc8al42']
📂 Evaluating: input.acsys8alo.acsys8alo
📂 Evaluating: input.apl8n.apl8n
📂 Evaluating: input.CL99SDopt1.CL99SDopt1
📂 Evaluating: input.fub99tf.fub99tf
📂 Evaluating: input.ibmg99c.ibmg99c
📂 Evaluating: input.INQ601.INQ601
📂 Evaluating: input.kdd8ps16.kdd8ps16
📂 Evaluating: input.mds08a5.mds08a5
📂 Evaluating: input.nttd8ale.nttd8ale
📂 Evaluating: input.ric8tpx.ric8tpx
📂 Evaluating: input.surffal2.surffal2
📂 Evaluating: input.unc8al42.unc8al42


In [7]:
# ---------- STEP 5: COMPUTE AVERAGE P@10 ----------
avg_scores = all_scores.groupby("system")["Pk"].mean().reset_index(name="avg_P10")
avg_scores = avg_scores.sort_values("avg_P10", ascending=False)


print("=== Average Precision@10 for set20 ===")
print(avg_scores.to_string(index=False))

=== Average Precision@10 for set20 ===
                     system  avg_P10
input.CL99SDopt1.CL99SDopt1    0.800
      input.fub99tf.fub99tf    0.604
  input.acsys8alo.acsys8alo    0.552
    input.nttd8ale.nttd8ale    0.544
      input.ric8tpx.ric8tpx    0.500
          input.apl8n.apl8n    0.480
        input.INQ601.INQ601    0.464
      input.mds08a5.mds08a5    0.436
      input.ibmg99c.ibmg99c    0.344
    input.unc8al42.unc8al42    0.328
    input.kdd8ps16.kdd8ps16    0.312
    input.surffal2.surffal2    0.012
