In [None]:
import numpy as np
import pandas as pd
from tqdm.std import tqdm
import json

In [None]:
input_file = "Passage_Reranking_Input/model's output after processing by 'process_to_eval'"
model_name = "model name"
output_file_name = "Passage_Reranking_Output/"

In [None]:
df = pd.read_csv(input_file,sep="\t")

In [None]:
df

In [None]:
def dcg_at_k(relevance_scores, k):
    """Compute Discounted Cumulative Gain (DCG) at rank k."""
    dcg = 0.0
    for i in range(min(k, len(relevance_scores))):
        rel = relevance_scores[i]
        dcg += (2**rel - 1) / np.log2(i + 2)  # log2(i+2) to avoid log(1)
    return dcg

In [None]:
def ndcg_at_k(relevance_scores, k):
    """Compute Normalized Discounted Cumulative Gain (nDCG) at rank k."""
    actual_dcg = dcg_at_k(relevance_scores, k)
    
    # Compute ideal DCG (IDCG) using sorted relevance scores (descending)
    ideal_relevance_scores = sorted(relevance_scores, reverse=True)
    ideal_dcg = dcg_at_k(ideal_relevance_scores, k)
    
    return actual_dcg / ideal_dcg if ideal_dcg > 0 else 0.0

In [None]:
def final_ndcg_result(df, k):
    results = []
    for qid, group in df.groupby('qid'):
        # Sort documents by rank and extract relevance scores
        relevance_scores = group.sort_values(by='rank')['relevance_score'].tolist()
        
        # Compute nDCG@k
        ndcg = ndcg_at_k(relevance_scores, k)
        
        results.append({'qid': qid, 'nDCG': ndcg})
    
    results_df = pd.DataFrame(results)

    mean_ndcg = results_df['nDCG'].mean()

    overall_row_mean = {
        'qid': 'Overall (Simple Average)',
        'nDCG': round(mean_ndcg,5)
    }
    
    return overall_row_mean

In [None]:
K = [5,10,15,20,30,60]

In [None]:
ndcg_result = []
for k in tqdm(K):
    res = final_ndcg_result(df, k)
    ndcg_result.append({f"nDCG@{k}": res.get("nDCG")})

In [None]:
ndcg_result

In [None]:
with open(f"{output_file_name}/ndcg_{model_name}.json", "w") as f:
    json.dump(ndcg_result,f,indent=4)