In [10]:
import pandas as pd
from io import StringIO

with open('../data/question_sample.csv', 'r', encoding='gbk') as f:
    df_question = f.read()

data_io = StringIO(df_question)
df_question = pd.read_csv(data_io)
print(df_question.head(10))

                                               input  \
0  What are the key differences between stocks an...   
1  What factors should I consider when creating a...   
2  What is the role of inflation in determining i...   
3  How do I assess the risk and return of differe...   
4  What is the significance of credit scores,  an...   
5  How does a mortgage work, and what are the dif...   
6  What are the benefits and risks of investing i...   
7  How can I effectively plan for retirement and ...   
8  What are exchange-traded funds (ETFs), and how...   
9  How does compound interest work, and why is it...   

                                         instruction  
0  Provide a clear comparison between stocks and ...  
1  List and explain the essential factors to cons...  
2  Explain how inflation impacts interest rates, ...  
3  Outline the steps for assessing the risk and r...  
4  Explain what credit scores are, why they matte...  
5  Describe the basics of how mortgages work, inc... 

In [11]:
ground_truth = df_question.to_dict(orient='records')
ground_truth[0]

{'input': 'What are the key differences between stocks and bonds?',
 'instruction': 'Provide a clear comparison between stocks and bonds, highlighting differences in ownership, risk, return, and how they function as investment vehicles. Use simple, easy-to-understand language.'}

In [None]:
import rag

def evaluate_all(test_data):
    total_rr = 0  
    total_hits = 0  
    total_questions = len(test_data)  

    for i, row in test_data.iterrows():
        query = row['input']
        result = rag.rag_with_evaluation(query)

        relevance = result['evaluated_result']['relevance']

        # MRR: Reciprocal Rank is 1 if relevant, otherwise 0
        if relevance in ["RELEVANT", "PARTLY_RELEVANT"]:
            rr = 1  # Only one answer is generated, so RR is 1 if relevant
            total_rr += rr
            total_hits += 1  # For Hit Rate, relevant counts as 1
        else:
            rr = 0  # Not relevant, RR is 0

        print(f"Query {i+1}: Answer={result['evaluated_result']['answer']}, Relevance={relevance}, RR={rr}, Hit={1 if rr else 0}, Time Taken={result['response_time']}s")

    final_mrr = total_rr / total_questions
    final_hit_rate = total_hits / total_questions

    return final_mrr, final_hit_rate

In [None]:
final_mrr, final_hit_rate = evaluate_all(ground_truth)
print(final_mrr)
print(final_hit_rate)