## Compute NDCG Scores

In [1]:
import csv

from models.rank_eval import get_ndcg

## Write functions to read two different csv files - scored-results.csv and question-results.csv

In [2]:
# Function to read the first CSV file with "Question," "ResultId," and "Score" columns
def read_true_csv(file_path):
    data_dict = {}
    with open(file_path, 'r') as file:
        reader = csv.DictReader(file)
        for row in reader:
            question = row.get('Question', '')  # Use get to handle missing 'Question' key
            result_id = row.get('ResultId', '')  # Use get to handle missing 'ResultId' key
            score = float(row.get('Score', 0))  # Use get to handle missing 'Score' key and convert to float
            if question and result_id:  # Check if both 'Question' and 'ResultId' are present
                if question not in data_dict:
                    data_dict[question] = []
                data_dict[question].append({'id': result_id, 'score': score})
    return data_dict

# Function to read the second CSV file with "Question," "ResultId," and "ResultRank" columns
def read_predicted_csv(file_path):
    data_dict = {}
    with open(file_path, 'r') as file:
        reader = csv.DictReader(file)
        for row in reader:
            question = row.get('Question', '')  # Use get to handle missing 'Question' key
            result_id = row.get('ResultId', '')  # Use get to handle missing 'ResultId' key
            result_rank = int(row.get('ResultRank', 0))  # Use get to handle missing 'ResultRank' key and convert to int
            if question and result_id:  # Check if both 'Question' and 'ResultId' are present
                if question not in data_dict:
                    data_dict[question] = []
                data_dict[question].append({'id': result_id, 'score': result_rank})
    return data_dict



## Write a function to compute the NDCG scores

In [31]:
# Function to convert ranks 1..20 to scores 20..1
# get_ndcg needs the top-ranked item (rank=1) to have the highest score (20) 
# and the bottom-ranked item (rank=20) to have the lowest score(1)
def ranks_to_scores(ranks):
    return [{'id': rank['id'], 'score': 21-rank['score']} for rank in ranks]

In [32]:
# Function to calculate NDCG scores
def calculate_ndcg(true_results, predicted_results, k_values):
    ndcg_scores = {}
    single_document_questions = []  # Keep track of questions with only one document
    
    for question in true_results:
        true_list = true_results[question]
        predicted_list = predicted_results.get(question, [])

        if len(true_list) == 1:
            # Append details of questions with only one document
            single_document_questions.append({
                'question': question,
                'true_result': true_list[0],
                'predicted_result': predicted_list[0] if predicted_list else None
            })
        else:
            # Calculate NDCG scores for questions with more than one document
            ndcg_scores[question] = [get_ndcg(true_list, ranks_to_scores(predicted_list), k) for k in k_values]
    
    # Print questions with only one document
    if len(single_document_questions) > 0:
        print("Questions with Only One Document:")
    for item in single_document_questions:
        print(f"Question: {item['question']}")
        print(f"True Result: {item['true_result']}")
        print(f"Predicted Result: {item['predicted_result']}")
        print()
    
    return ndcg_scores

## Call the functions to read the csv files and to compute NDCG scores

In [33]:
# Define file paths
true_results_file = '../references/scored-results.csv'
predicted_results_file = '../data/exports/question-results.csv'
    
# Read the first CSV file with true results
true_results = read_true_csv(true_results_file)

# Read the second CSV file with predicted results
predicted_results = read_predicted_csv(predicted_results_file)

# Calculate NDCG scores
k_values = [5, 3, 10]
ndcg_scores = calculate_ndcg(true_results, predicted_results, k_values)
# print("predicted results", predicted_results)
# print()
# print("true results", true_results)
# Print NDCG scores
print("NDCG Scores:")
for question, scores in ndcg_scores.items():
    print(f"Question: {question}, NDCG Scores: {scores}")

NDCG Scores:
Question: Who is Oliver Cowdery?, NDCG Scores: [0.09637771110074016, 0.0, 0.326803922706866]
Question: Who wrote the Book of Mormon?, NDCG Scores: [0.15101961822780524, 0.0, 0.2741708125981703]
Question: How do the Bible and Book of Mormon work together?, NDCG Scores: [0.3451913422468693, 0.29608191096586517, 0.5174228122293323]
Question: Where is the Hill Cumorah?, NDCG Scores: [0.2340771373576302, 0.09869397032195507, 0.45282232161847336]
Question: What archaeological proof is there for the Book of Mormon?, NDCG Scores: [0.482881526673006, 0.3393978490514598, 0.6346891887826417]
Question: Where did we get the Book of Abraham?, NDCG Scores: [0.0, 0.0, 0.0]
Question: How long did it take to translate the Book of Mormon?, NDCG Scores: [0.6843515475204854, 0.7653606369886217, 0.7139291466664222]
Question: How was the Book of Mormon translated?, NDCG Scores: [0.7336401894092817, 0.6161648742095498, 0.687725895933025]
Question: What was the Book of Commandments?, NDCG Scores: 

In [34]:
len(ndcg_scores)

90

## Average NDCG scores

In [35]:
# Calculate and print average NDCG scores
avg_ndcg_scores = {k: sum(scores) / len(scores) for k, scores in zip(k_values, zip(*ndcg_scores.values()))}
print("\nAverage NDCG Scores:")
for k, avg_score in avg_ndcg_scores.items():
    print(f"At k={k}: {avg_score}")


Average NDCG Scores:
At k=5: 0.48690831259697737
At k=3: 0.47149845304054544
At k=10: 0.5475981121403847


## Look at an individual question

In [51]:
q = "Why do you serve missions?"
k = 5

ndcg = get_ndcg(true_results[q], ranks_to_scores(predicted_results[q]), k)
print(f"NDCG={ndcg}")

print(f"Scores of the top {k} search results")
scores = {id_score['id']: id_score['score'] for id_score in true_results[q]}
rankings = {id_score['score']: id_score['id'] for id_score in predicted_results[q]}
for ix in range(1, k+1):
    _id = rankings.get(ix, 0)
    print(ix, scores.get(_id, 0), _id)

print("Ideal top search results")
sorted_scores = sorted(true_results[q], key=lambda x: x['score'], reverse=True)
curr_score = 3
for ix in range(len(sorted_scores)):
    id_score = sorted_scores[ix]
    if id_score['score'] < curr_score:
        if ix >= k:
            break
        print('-----')
    print(ix+1, id_score['score'], id_score['id'])
    curr_score = id_score['score']

NDCG=0.5781119907985042
Scores of the top 5 search results
1 1.0 7fb6ff9c23fe2401fc301bbe13054babaa7db09d116face6b3f88965ea65c334
2 3.0 5714a015089a55c3b941aa1b79553bfbb11154f935fbac955e0469789f4dd0fb
3 1.0 3b52341e94eb71dafff9f6518f992c8b48217c6b048b75c97c8b1e04e95fef45
4 2.0 3789ca3f53c0e1c0a682f7cee5db1f389cd31b3ab9321bfead2166f3d16470ef
5 1.0 a66ff3ec432f9f5d3a10094b6470869c35327b2add39fa70224130663168b4d4
Ideal top search results
1 3.0 5714a015089a55c3b941aa1b79553bfbb11154f935fbac955e0469789f4dd0fb
2 3.0 0947d33fb826ad1bf3a5b999d60a85e286a666de617569bf0f51e84006e5c922
3 3.0 41c63c695d10cd390ed58db1d58a0422d015c462e799f647749c52cefc6ebcec
-----
4 2.0 3789ca3f53c0e1c0a682f7cee5db1f389cd31b3ab9321bfead2166f3d16470ef
5 2.0 c873981632bad88c58c334bc89af2dfc0278246d451c5d7c92343aa5dcab4a5e
6 2.0 2036aa60f0dfe035a2da9336b5fe9c5accc51d46963851f6241de4b46dff4db6
7 2.0 ffd4994b33c72bee01b2f550782b42d510a03dd57d8855973913bfd1d95fb6de
