In [None]:
import pandas as pd
import numpy as np
import json
from tqdm.std import tqdm

In [None]:
input_file = "Passage_Reranking_Input/model's output after processing by 'process_to_eval'"
model_name = "model name"
output_file_name = "Passage_Reranking_Output/"

In [None]:
df = pd.read_csv(input_file,sep="\t")

In [None]:
df

In [None]:
K = [5,10,15,20,30,60]

In [None]:
def compute_err(relevance_scores, k):
    """Compute the Expected Reciprocal Rank (ERR) for a given ranked list."""
    err = 0.0
    prob_not_satisfied = 1.0
    
    # The relevance level is 0-4, normalized to the range 0-1
    for r in range(1, min(k, len(relevance_scores)) + 1):
        g = relevance_scores[r - 1]
        # Normalize the correlation score to 0-1, the denominator should be 2^4 - 1 = 15
        R_r = (2**g - 1) / 15  # Here g is the correlation score from 0 to 4
        
        # Ensure that the value of R_r is within a reasonable range
        R_r = max(0, min(R_r, 1))
        
        # Accumulate the value of ERR
        err += (1 / r) * prob_not_satisfied * R_r
        
        # Update the probability that the user is not satisfied
        prob_not_satisfied *= (1 - R_r)
        
    return err

In [None]:
def final_result(df, k):
    results = []
    for qid, group in df.groupby('qid'):
        # Get ranking relevance score
        relevance_scores = group.sort_values(by='rank')['relevance_score'].tolist()
        
       # Calculate the raw ERR
        err = compute_err(relevance_scores, k=k)
        
        results.append({'qid': qid, 'ERR': err})
    

    results_df = pd.DataFrame(results)

    mean_err = results_df['ERR'].mean()

    overall_row_mean={
        'qid': 'Overall (Simple Average)',
        'ERR': round(mean_err,5)}
    
    return overall_row_mean

In [None]:
err_result = []
for k in tqdm(K):
    res = final_result(df, k)
    err_result.append({f"err@{k}": res.get("ERR")})

In [None]:
err_result

In [None]:
with open(f"{output_file_name}/err_{model_name}.json", "w") as f:
    json.dump(err_result,f,indent=4)