# Evaluations
Runs through the different stages of the RAG agent and evaluates the performance of the different stages

In [1]:
import os
import sys
from pathlib import Path

sys.path.append(str(Path.cwd().parent))

LIMIT = 100 # set the number of questions to evaluate
results = {}

# temporary output path for saving the data at each stage
tmp_output_path = os.path.join('..', 'eval_data', 'limit_qa_data.json')
results_path = os.path.join('..', 'eval_data', 'results.csv')

## Load the data and prep the dataset

In [2]:
import json

data_dir = os.path.join('..', 'ConvFinQA/data/')

train_data = json.load(open(os.path.join(data_dir, 'train.json')))

Not all the data has qa, so we need to filter out the data that doesn't have qa

In [3]:
qa_data = [
    {
        'id': data['id'],
        'question': data[qa_key]['question'],
        'answer': data[qa_key]['answer']
    }
    for data in train_data
    for qa_key in [k for k in data.keys() if k == 'qa' or k.startswith('qa_')] # account for multiple qa keys
]

In [4]:
qa_data[:5]

[{'id': 'Single_JKHY/2009/page_28.pdf-3',
  'question': 'what was the percentage change in the net cash from operating activities from 2008 to 2009',
  'answer': '14.1%'},
 {'id': 'Single_RSG/2008/page_114.pdf-2',
  'question': 'what was the percent of the growth in the revenues from 2007 to 2008',
  'answer': '1.3%'},
 {'id': 'Single_AAPL/2002/page_23.pdf-1',
  'question': 'what was the percentage change in net sales from 2000 to 2001?',
  'answer': '-32%'},
 {'id': 'Single_UPS/2009/page_33.pdf-2',
  'question': 'what was the difference in percentage cumulative return on investment for united parcel service inc . compared to the s&p 500 index for the five year period ended 12/31/09?',
  'answer': '-26.16%'},
 {'id': 'Double_UPS/2009/page_33.pdf',
  'question': 'what is the roi of an investment in ups in 2004 and sold in 2006?',
  'answer': '-8.9%'}]

In [5]:
# only take the first LIMIT questions
limit_qa_data = qa_data[:LIMIT]

In [6]:
with open(tmp_output_path, 'w') as f:
    json.dump(limit_qa_data, f, indent=4)

## Retrieval
First let's take a look at the retrieval stage.

For this I will look at Recall and Mean Reciprocal Rank (MRR). 

Precision is less relevant here as we always retrieve K documents and there's only 1 relevant document for each question.

In [7]:
from src.nodes import retriever_node

In [8]:
for data in limit_qa_data:
    input_dict = {'question': data['question'], 'metadata_filtering': False}
    retrieved_docs = retriever_node(input_dict)['retrieved_docs']
    data['retrieved_docs'] = retrieved_docs

    retrieved_docs_ids = [doc['id'] for doc in retrieved_docs]
    if data['id'] in retrieved_docs_ids:
        data['retrieved'] = 1
        rank = retrieved_docs_ids.index(data['id']) + 1  # +1 because index starts at 0
        data['reciprocal_rank'] = 1.0 / rank
    else:
        data['retrieved'] = 0
        data['reciprocal_rank'] = 0

In [9]:
# Calculate metrics
retrieved_data = [d for d in limit_qa_data if d['retrieved'] == 1]
num_retrieved = len(retrieved_data)

retrieval_recall = sum(data['retrieved'] for data in limit_qa_data) / len(limit_qa_data)
retrieval_mrr = sum(data['reciprocal_rank'] for data in limit_qa_data) / len(limit_qa_data)
retrieval_mrr_retrieved = sum(data['reciprocal_rank'] for data in retrieved_data) / num_retrieved

print(f"Overall Retrieval Recall: {retrieval_recall:.3f}")
print(f"Overall Retrieval MRR: {retrieval_mrr:.3f}")
print(f"Successful Retrieval MRR: {retrieval_mrr_retrieved:.3f}")

Overall Retrieval Recall: 0.620
Overall Retrieval MRR: 0.268
Successful Retrieval MRR: 0.432


In [10]:
results['overall_retrieval_recall'] = retrieval_recall
results['overall_retrieval_mrr'] = retrieval_mrr
results['successful_retrieval_mrr'] = retrieval_mrr_retrieved

In [11]:
with open(tmp_output_path, 'w') as f:
    json.dump(limit_qa_data, f, indent=4)

### Metatdata Filtering Experiment
Let's run the same epeiment with metadata filtering

In [None]:
for data in limit_qa_data:
    input_dict = {'question': data['question'], 'metadata_filtering': True}
    mf_retrieved_docs = retriever_node(input_dict)['retrieved_docs']
    data['mf_retrieved_docs'] = mf_retrieved_docs

    mf_retrieved_docs_ids = [doc['id'] for doc in mf_retrieved_docs]
    if data['id'] in mf_retrieved_docs_ids:
        data['mf_retrieved'] = 1
        rank = mf_retrieved_docs_ids.index(data['id']) + 1  # +1 because index starts at 0
        data['mf_reciprocal_rank'] = 1.0 / rank
    else:
        data['mf_retrieved'] = 0
        data['mf_reciprocal_rank'] = 0

In [13]:
# Calculate metrics
mf_retrieved_data = [d for d in limit_qa_data if d['mf_retrieved'] == 1]
num_mf_retrieved = len(mf_retrieved_data)

mf_retrieval_recall = sum(data['mf_retrieved'] for data in limit_qa_data) / len(limit_qa_data)
mf_retrieval_mrr = sum(data['mf_reciprocal_rank'] for data in limit_qa_data) / len(limit_qa_data)
mf_retrieval_mrr_retrieved = sum(data['mf_reciprocal_rank'] for data in mf_retrieved_data) / num_mf_retrieved

print(f"Overall MF Retrieval Recall: {mf_retrieval_recall:.3f}")
print(f"Overall MF Retrieval MRR: {mf_retrieval_mrr:.3f}")
print(f"Successful MF Retrieval MRR: {mf_retrieval_mrr_retrieved:.3f}")

Overall MF Retrieval Recall: 0.580
Overall MF Retrieval MRR: 0.312
Successful MF Retrieval MRR: 0.538


Metadata filter had little effect on retrieval performance. Marginally increasing MRR, but recall was reduced. Metadata filtering will not be used.

## Reranking
Now let's analyse the reranking.

This will also use Recall and MRR.

In [14]:
from src.nodes import reranker_node

import time
from tqdm import tqdm

In [None]:
for data in tqdm(limit_qa_data, desc="Reranking Documents"):
    input_dict = {'question': data['question'], 'retrieved_docs': data['retrieved_docs']}
    reranked_docs = reranker_node(input_dict)['reranked_docs']
    data['reranked_docs'] = reranked_docs

    reranked_docs_ids = [doc['id'] for doc in reranked_docs]
    if data['id'] in reranked_docs_ids:
        data['reranked'] = 1
        rank = reranked_docs_ids.index(data['id']) + 1  # +1 because index starts at 0
        data['reranked_reciprocal_rank'] = 1.0 / rank
    else:
        data['reranked'] = 0
        data['reranked_reciprocal_rank'] = 0

    # Using the free tier Cohere API you get 10 requests per minute
    # Comment this if you have a paid tier
    time.sleep(6)

In [16]:
# Calculate metrics
reranked_data = [d for d in limit_qa_data if d['reranked'] == 1]
num_reranked = len(reranked_data)

reranking_recall = sum(data['reranked'] for data in limit_qa_data) / len(limit_qa_data)
reranking_mrr = sum(data['reranked_reciprocal_rank'] for data in limit_qa_data) / len(limit_qa_data)
reranked_recall_retrieved = sum(data['reranked'] for data in retrieved_data) / num_retrieved
reranking_mrr_retrieved = sum(data['reranked_reciprocal_rank'] for data in reranked_data) / num_reranked

print(f"Overall Reranking Recall: {reranking_recall:.3f}")
print(f"Overall Reranking MRR: {reranking_mrr:.3f}")
print(f"Successful Reranking Recall: {reranked_recall_retrieved:.3f}")
print(f"Successful Reranking MRR: {reranking_mrr_retrieved:.3f}")

Overall Reranking Recall: 0.540
Overall Reranking MRR: 0.427
Successful Reranking Recall: 0.871
Successful Reranking MRR: 0.790


In [17]:
results['overall_reranking_recall'] = reranking_recall
results['overall_reranking_mrr'] = reranking_mrr
results['successful_reranking_recall'] = reranked_recall_retrieved
results['successful_reranking_mrr'] = reranking_mrr_retrieved

In [18]:
with open(tmp_output_path, 'w') as f:
    json.dump(limit_qa_data, f, indent=4)

## LLM Correctness
In other RAG systems where the answer is expected as a paragraph or may use multiple sources, LLM faithfulness can be  agood metric. In our case with ConvFinQA, the answer is expected as a single number, and therefore a more binary metric is more appropriate.

In [20]:
limit_qa_data = json.load(open(tmp_output_path))

In [21]:
from src.nodes import answer_question_node

import re
import math

In [22]:
def clean_number(text):
    """
    Extract and normalize number from text, removing symbols and units.
    Returns float or None if no number found.
    """
    
    # Remove common units and words
    text = text.lower()
    text = re.sub(r'%|percent|million|thousand|billion|m|k|b|\$', '', text)

    # handle yes/no answer present in the data
    if text in ['yes', 'no']:
        return text
    
    # Remove commas from numbers (e.g., "25,000" -> "25000")
    text = re.sub(r'(\d),(\d)', r'\1\2', text)
    
    # Find numbers (including decimals and negatives)
    numbers = re.findall(r'-?\d*\.?\d+', text)
    
    if numbers:
        return float(numbers[0])  # Return first number found
    return None

In [23]:
for i, data in tqdm(enumerate(limit_qa_data), desc="Answering Questions"):
    input_dict = {'question': data['question'], 'reranked_docs': data['reranked_docs'], 'chat': False}
    raw_rag_answer = answer_question_node(input_dict)['answer']
    data['raw_rag_answer'] = raw_rag_answer

    # clean the answers for comparison
    cleaned_original_answer = clean_number(data['answer'])
    cleaned_rag_answer = clean_number(raw_rag_answer)
    data['cleaned_rag_answer'] = cleaned_rag_answer

    # now do the comparison
    correct = False
    # accounts for 'yes' and 'no' answers and direct matches
    if cleaned_rag_answer == cleaned_original_answer:
        correct = True
    elif isinstance(cleaned_rag_answer, float) and isinstance(cleaned_original_answer, float):
        # account for the fact that the original answer is provided with no decimal places (e.g. 17%) but I provide more precide value (e.g. 17.1% or 16.9%)
        # also considers absolute matches the same to account percetnage change questions
        if cleaned_original_answer.is_integer():
            int_rag = int(cleaned_rag_answer)
            rounded_rag = round(cleaned_rag_answer)
            if abs(cleaned_original_answer) in (abs(int_rag), abs(rounded_rag)):
                correct = True
        # standard case
        else:
            if math.isclose(abs(cleaned_rag_answer), abs(cleaned_original_answer), rel_tol=0.01, abs_tol=0.05): # 0.01 seems appropriate for this data to account for LLM rounding errors
                correct = True
    
    data['correct'] = 1.0 if correct else 0.0

    print("-"*70)
    print(f"Document {i+1}: {data['id']}")
    print(f"Question {i+1}: {data['question']}")
    print(f"Original Answer: {data['answer']}")
    print(f"RAG Answer: {raw_rag_answer}")
    print(f"Cleaned Original Answer: {cleaned_original_answer}")
    print(f"Cleaned RAG Answer: {cleaned_rag_answer}")
    print(f"Correct: {correct}")
    print("-"*70)
    print("\n")

correct_count = sum(data['correct'] for data in limit_qa_data)
accuracy = correct_count / len(limit_qa_data)
print(f"Correct Count: {correct_count}")
print(f"Accuracy: {accuracy:.3f}")

Answering Questions: 0it [00:00, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Answering Questions: 1it [00:01,  1.52s/it]

----------------------------------------------------------------------
Document 1: Single_JKHY/2009/page_28.pdf-3
Question 1: what was the percentage change in the net cash from operating activities from 2008 to 2009
Original Answer: 14.1%
RAG Answer: 14.2%
Cleaned Original Answer: 14.1
Cleaned RAG Answer: 14.2
Correct: True
----------------------------------------------------------------------




Answering Questions: 2it [00:03,  1.73s/it]

----------------------------------------------------------------------
Document 2: Single_RSG/2008/page_114.pdf-2
Question 2: what was the percent of the growth in the revenues from 2007 to 2008
Original Answer: 1.3%
RAG Answer: 16.0%
Cleaned Original Answer: 1.3
Cleaned RAG Answer: 16.0
Correct: False
----------------------------------------------------------------------




Answering Questions: 3it [00:05,  1.84s/it]

----------------------------------------------------------------------
Document 3: Single_AAPL/2002/page_23.pdf-1
Question 3: what was the percentage change in net sales from 2000 to 2001?
Original Answer: -32%
RAG Answer: 8%
Cleaned Original Answer: -32.0
Cleaned RAG Answer: 8.0
Correct: False
----------------------------------------------------------------------




Answering Questions: 4it [00:07,  1.82s/it]

----------------------------------------------------------------------
Document 4: Single_UPS/2009/page_33.pdf-2
Question 4: what was the difference in percentage cumulative return on investment for united parcel service inc . compared to the s&p 500 index for the five year period ended 12/31/09?
Original Answer: -26.16%
RAG Answer: -26.16%
Cleaned Original Answer: -26.16
Cleaned RAG Answer: -26.16
Correct: True
----------------------------------------------------------------------




Answering Questions: 5it [00:09,  1.90s/it]

----------------------------------------------------------------------
Document 5: Double_UPS/2009/page_33.pdf
Question 5: what is the roi of an investment in ups in 2004 and sold in 2006?
Original Answer: -8.9%
RAG Answer: -8.94%
Cleaned Original Answer: -8.9
Cleaned RAG Answer: -8.94
Correct: True
----------------------------------------------------------------------




Answering Questions: 6it [00:11,  2.13s/it]

----------------------------------------------------------------------
Document 6: Double_UPS/2009/page_33.pdf
Question 6: what was the difference in percentage cumulative return on investment for united parcel service inc . compared to the s&p 500 index for the five year period ended 12/31/09?
Original Answer: -26.16%
RAG Answer: -26.16%
Cleaned Original Answer: -26.16
Cleaned RAG Answer: -26.16
Correct: True
----------------------------------------------------------------------




Answering Questions: 7it [00:13,  2.06s/it]

----------------------------------------------------------------------
Document 7: Single_CE/2010/page_134.pdf-2
Question 7: what portion of the total shares subject to outstanding awards is under the 2009 global incentive plan?
Original Answer: 70.1%
RAG Answer: 2530454 shares
Cleaned Original Answer: 70.1
Cleaned RAG Answer: 2530454.0
Correct: False
----------------------------------------------------------------------




Answering Questions: 8it [00:15,  2.08s/it]

----------------------------------------------------------------------
Document 8: Single_JPM/2013/page_104.pdf-2
Question 8: what was the percentage increase in litigation reserves in 2012?
Original Answer: 15.6%
RAG Answer: 8%
Cleaned Original Answer: 15.6
Cleaned RAG Answer: 8.0
Correct: False
----------------------------------------------------------------------




Answering Questions: 9it [00:17,  2.01s/it]

----------------------------------------------------------------------
Document 9: Double_MAS/2012/page_92.pdf
Question 9: what was the percent of the change in the company 2019s warranty liability from 2011 to 2012
Original Answer: 15.7%
RAG Answer: 15.69%
Cleaned Original Answer: 15.7
Cleaned RAG Answer: 15.69
Correct: True
----------------------------------------------------------------------




Answering Questions: 10it [00:19,  1.83s/it]

----------------------------------------------------------------------
Document 10: Double_MAS/2012/page_92.pdf
Question 10: what was the percentage change in the company's warranty liability from 2011 to 2012?
Original Answer: 16%
RAG Answer: 15.69%
Cleaned Original Answer: 16.0
Cleaned RAG Answer: 15.69
Correct: True
----------------------------------------------------------------------




Answering Questions: 11it [00:21,  2.01s/it]

----------------------------------------------------------------------
Document 11: Single_HIG/2004/page_122.pdf-2
Question 11: what portion of total obligations are due within the next 3 years?
Original Answer: 22.99%
RAG Answer: $14,432 million
Cleaned Original Answer: 22.99
Cleaned RAG Answer: 14432.0
Correct: False
----------------------------------------------------------------------




Answering Questions: 12it [00:23,  2.05s/it]

----------------------------------------------------------------------
Document 12: Single_SLG/2013/page_133.pdf-4
Question 12: for the years ended december 31 , 2013 , 2012 and 2011 , what was the total in millions capitalized to assets associated with compensation expense related to long-term compensation plans , restricted stock and stock options?\\n
Original Answer: 12
RAG Answer: $4.5 million in 2013, $4.1 million in 2012, and $3.4 million in 2011
Cleaned Original Answer: 12.0
Cleaned RAG Answer: 4.5
Correct: False
----------------------------------------------------------------------




Answering Questions: 13it [00:28,  2.92s/it]

----------------------------------------------------------------------
Document 13: Double_AES/2016/page_98.pdf
Question 13: what was the change in millions between 2014 and 2015 of foreign currency transaction gains ( losses ) for aes corporation?
Original Answer: 3
RAG Answer: $3 million
Cleaned Original Answer: 3.0
Cleaned RAG Answer: 3.0
Correct: True
----------------------------------------------------------------------




Answering Questions: 14it [00:36,  4.47s/it]

----------------------------------------------------------------------
Document 14: Double_AES/2016/page_98.pdf
Question 14: what was the change in millions between 2015 and 2016 of foreign currency transaction gains ( losses ) for aes corporation?
Original Answer: -19
RAG Answer: $19 million
Cleaned Original Answer: -19.0
Cleaned RAG Answer: 19.0
Correct: True
----------------------------------------------------------------------




Answering Questions: 15it [00:42,  4.76s/it]

----------------------------------------------------------------------
Document 15: Single_ETR/2008/page_336.pdf-3
Question 15: what is the percent change in net revenue between 2007 and 2008?
Original Answer: 2.4%
RAG Answer: -3.22%
Cleaned Original Answer: 2.4
Cleaned RAG Answer: -3.22
Correct: False
----------------------------------------------------------------------




Answering Questions: 16it [00:48,  5.41s/it]

----------------------------------------------------------------------
Document 16: Single_BKR/2017/page_105.pdf-2
Question 16: what portion of the authorized shares of class b common stock is outstanding as of december 31 , 2017?
Original Answer: 56.6%
RAG Answer: 56.56%
Cleaned Original Answer: 56.6
Cleaned RAG Answer: 56.56
Correct: True
----------------------------------------------------------------------




Answering Questions: 17it [00:56,  5.96s/it]

----------------------------------------------------------------------
Document 17: Double_AES/2011/page_230.pdf
Question 17: as of december 31 , 2011 , what is the total in billions available under the committed credit facilities?
Original Answer: 2.6
RAG Answer: $3 billion
Cleaned Original Answer: 2.6
Cleaned RAG Answer: 3.0
Correct: False
----------------------------------------------------------------------




Answering Questions: 18it [01:04,  6.58s/it]

----------------------------------------------------------------------
Document 18: Double_AES/2011/page_230.pdf
Question 18: what percentage of total non-recourse debt as of december 31 , 2011 is due in 2014?
Original Answer: 11%
RAG Answer: 10.55%
Cleaned Original Answer: 11.0
Cleaned RAG Answer: 10.55
Correct: True
----------------------------------------------------------------------




Answering Questions: 19it [01:06,  5.21s/it]

----------------------------------------------------------------------
Document 19: Single_JPM/2016/page_73.pdf-4
Question 19: what was the 5 year return of the kbw bank index?
Original Answer: 158.82%
RAG Answer: 131%
Cleaned Original Answer: 158.82
Cleaned RAG Answer: 131.0
Correct: False
----------------------------------------------------------------------




Answering Questions: 20it [01:15,  6.34s/it]

----------------------------------------------------------------------
Document 20: Double_ADBE/2018/page_66.pdf
Question 20: what is the average yearly amortization expense related to trademarks?
Original Answer: 11.1%
RAG Answer: $25.14 million
Cleaned Original Answer: 11.1
Cleaned RAG Answer: 25.14
Correct: False
----------------------------------------------------------------------




Answering Questions: 21it [01:20,  6.01s/it]

----------------------------------------------------------------------
Document 21: Double_ADBE/2018/page_66.pdf
Question 21: what is the average yearly amortization expense related to purchased technology?
Original Answer: 16.7%
RAG Answer: $65 million
Cleaned Original Answer: 16.7
Cleaned RAG Answer: 65.0
Correct: False
----------------------------------------------------------------------




Answering Questions: 22it [01:29,  7.03s/it]

----------------------------------------------------------------------
Document 22: Double_PNC/2014/page_99.pdf
Question 22: in 2014 what was the percent of the change associated with total net charge-offs
Original Answer: 14.7%
RAG Answer: 50.7%
Cleaned Original Answer: 14.7
Cleaned RAG Answer: 50.7
Correct: False
----------------------------------------------------------------------




Answering Questions: 23it [01:38,  7.62s/it]

----------------------------------------------------------------------
Document 23: Double_PNC/2014/page_99.pdf
Question 23: what was the ratio of the provision for credit losses in 2014 compared 2013 .
Original Answer: .43
RAG Answer: 0.424
Cleaned Original Answer: 0.43
Cleaned RAG Answer: 0.424
Correct: True
----------------------------------------------------------------------




Answering Questions: 24it [01:46,  7.65s/it]

----------------------------------------------------------------------
Document 24: Single_BLK/2017/page_35.pdf-4
Question 24: what is the percentage change in the balance of asset allocation from 2016 to 2017?
Original Answer: 11.2%
RAG Answer: 98.57%
Cleaned Original Answer: 11.2
Cleaned RAG Answer: 98.57
Correct: False
----------------------------------------------------------------------




Answering Questions: 25it [01:51,  6.69s/it]

----------------------------------------------------------------------
Document 25: Single_HII/2015/page_120.pdf-4
Question 25: between 2015 and 2013 what was the average compensation expense related to the issuing of the stock award in millions
Original Answer: 40.3
RAG Answer: $40.33 million
Cleaned Original Answer: 40.3
Cleaned RAG Answer: 40.33
Correct: True
----------------------------------------------------------------------




Answering Questions: 26it [02:02,  8.20s/it]

----------------------------------------------------------------------
Document 26: Single_AMT/2002/page_104.pdf-2
Question 26: what is the total expected payments for principal of long- term debt , including capital leases in the next 36 months?
Original Answer: 594840
RAG Answer: $1,747 million
Cleaned Original Answer: 594840.0
Cleaned RAG Answer: 1747.0
Correct: False
----------------------------------------------------------------------




Answering Questions: 27it [02:05,  6.70s/it]

----------------------------------------------------------------------
Document 27: Single_VRTX/2005/page_103.pdf-1
Question 27: what is the percent change in net loss on disposal of assets between 2004 and 2005?
Original Answer: 700%
RAG Answer: 700%
Cleaned Original Answer: 700.0
Cleaned RAG Answer: 700.0
Correct: True
----------------------------------------------------------------------




Answering Questions: 28it [02:08,  5.44s/it]

----------------------------------------------------------------------
Document 28: Single_STT/2013/page_54.pdf-4
Question 28: how much higher are the returns of the s&p 500 in the same period ( 2008-2013 ) ? as a percentage .
Original Answer: 30%
RAG Answer: 61%
Cleaned Original Answer: 30.0
Cleaned RAG Answer: 61.0
Correct: False
----------------------------------------------------------------------




Answering Questions: 29it [02:18,  6.85s/it]

----------------------------------------------------------------------
Document 29: Single_GS/2014/page_62.pdf-2
Question 29: what is the percentage change in standardized rwas in 2014?
Original Answer: -1.2%
RAG Answer: -1.2%
Cleaned Original Answer: -1.2
Cleaned RAG Answer: -1.2
Correct: True
----------------------------------------------------------------------




Answering Questions: 30it [02:24,  6.64s/it]

----------------------------------------------------------------------
Document 30: Double_GPN/2018/page_85.pdf
Question 30: how much did the annual payments increase from 2019 to 2024 and beyond?
Original Answer: 350824 thousand
RAG Answer: $102.7 million
Cleaned Original Answer: 350824.0
Cleaned RAG Answer: 102.7
Correct: False
----------------------------------------------------------------------




Answering Questions: 31it [02:29,  6.02s/it]

----------------------------------------------------------------------
Document 31: Double_GPN/2018/page_85.pdf
Question 31: what is the yearly interest expense incurred from term a loan , ( in millions ) ?
Original Answer: 60.3
RAG Answer: $26.4 million
Cleaned Original Answer: 60.3
Cleaned RAG Answer: 26.4
Correct: False
----------------------------------------------------------------------




Answering Questions: 32it [02:31,  4.74s/it]

----------------------------------------------------------------------
Document 32: Double_EW/2016/page_36.pdf
Question 32: what was the percentage cumulative total return for edwards lifesciences for the five years ended 2016?
Original Answer: 165%
RAG Answer: 165.06%
Cleaned Original Answer: 165.0
Cleaned RAG Answer: 165.06
Correct: True
----------------------------------------------------------------------




Answering Questions: 33it [02:42,  6.67s/it]

----------------------------------------------------------------------
Document 33: Double_EW/2016/page_36.pdf
Question 33: what was the difference in cumulative percentage returns between edwards lifesciences and the s&p 500 for the five years ended 2016?
Original Answer: 67%
RAG Answer: 66.88%
Cleaned Original Answer: 67.0
Cleaned RAG Answer: 66.88
Correct: True
----------------------------------------------------------------------




Answering Questions: 34it [02:43,  5.14s/it]

----------------------------------------------------------------------
Document 34: Single_ADI/2009/page_90.pdf-3
Question 34: what percentage did the balance increase from 2007 to 2009?
Original Answer: 83.6%
RAG Answer: 118.6%
Cleaned Original Answer: 83.6
Cleaned RAG Answer: 118.6
Correct: False
----------------------------------------------------------------------




Answering Questions: 35it [02:49,  5.29s/it]

----------------------------------------------------------------------
Document 35: Single_AON/2007/page_185.pdf-4
Question 35: what is the percentual decrease observed in the future minimum rental payments during 2008 and 2009?
Original Answer: 13.25%
RAG Answer: -62.5%
Cleaned Original Answer: 13.25
Cleaned RAG Answer: -62.5
Correct: False
----------------------------------------------------------------------




Answering Questions: 36it [03:03,  7.89s/it]

----------------------------------------------------------------------
Document 36: Single_ZBH/2002/page_46.pdf-3
Question 36: what was the percentage change of total debt from 2001 to 2002?
Original Answer: -57%
RAG Answer: -56.94%
Cleaned Original Answer: -57.0
Cleaned RAG Answer: -56.94
Correct: True
----------------------------------------------------------------------




Answering Questions: 37it [03:08,  7.17s/it]

----------------------------------------------------------------------
Document 37: Single_HII/2015/page_120.pdf-1
Question 37: what is the total revenue for the fiscal year of 2015?
Original Answer: 7020
RAG Answer: $7,426 million
Cleaned Original Answer: 7020.0
Cleaned RAG Answer: 7426.0
Correct: False
----------------------------------------------------------------------




Answering Questions: 38it [03:14,  6.74s/it]

----------------------------------------------------------------------
Document 38: Single_AES/2002/page_117.pdf-2
Question 38: what was the percentage change in revenues for investments in 50% ( 50 % ) or less owned investments accounted for using the equity method between 2000 and 2001?
Original Answer: -2%
RAG Answer: -1.5%
Cleaned Original Answer: -2.0
Cleaned RAG Answer: -1.5
Correct: True
----------------------------------------------------------------------




Answering Questions: 39it [03:21,  6.82s/it]

----------------------------------------------------------------------
Document 39: Single_ALXN/2016/page_89.pdf-1
Question 39: how many square feet are owned by the company?
Original Answer: 377000
RAG Answer: 5,889,000 square feet
Cleaned Original Answer: 377000.0
Cleaned RAG Answer: 5889000.0
Correct: False
----------------------------------------------------------------------




Answering Questions: 40it [03:30,  7.54s/it]

----------------------------------------------------------------------
Document 40: Single_AON/2018/page_41.pdf-3
Question 40: considering the years 2017 and 2018 , what is the percentual increase observed in capital expenditures used for continuing operations?
Original Answer: 31.14%
RAG Answer: 29.41%
Cleaned Original Answer: 31.14
Cleaned RAG Answer: 29.41
Correct: False
----------------------------------------------------------------------




Answering Questions: 41it [03:38,  7.54s/it]

----------------------------------------------------------------------
Document 41: Single_RSG/2012/page_145.pdf-1
Question 41: what was the percentage decline in the total restricted cash and marketable securities from 2011 to 2012
Original Answer: -13.4%
RAG Answer: 13.4%
Cleaned Original Answer: -13.4
Cleaned RAG Answer: 13.4
Correct: True
----------------------------------------------------------------------




Answering Questions: 42it [03:47,  8.04s/it]

----------------------------------------------------------------------
Document 42: Double_CB/2008/page_144.pdf
Question 42: what was the ratio of the pre-tax impact on equity of hypothetical 10 percent strengthening of the u.s . dollar in 2007 to 2008
Original Answer: 1.79
RAG Answer: 0.56
Cleaned Original Answer: 1.79
Cleaned RAG Answer: 0.56
Correct: False
----------------------------------------------------------------------




Answering Questions: 43it [03:55,  7.93s/it]

----------------------------------------------------------------------
Document 43: Double_CB/2008/page_144.pdf
Question 43: what is percentage change in fair value of net assets denominated in foreign currencies from 2007 to 2008?
Original Answer: -31.7%
RAG Answer: -31.73%
Cleaned Original Answer: -31.7
Cleaned RAG Answer: -31.73
Correct: True
----------------------------------------------------------------------




Answering Questions: 44it [04:02,  7.72s/it]

----------------------------------------------------------------------
Document 44: Single_STT/2011/page_94.pdf-3
Question 44: what was the percent change in the value of commercial paper outstanding between 2010 and 2011?
Original Answer: 18%
RAG Answer: -100%
Cleaned Original Answer: 18.0
Cleaned RAG Answer: -100.0
Correct: False
----------------------------------------------------------------------




Answering Questions: 45it [04:06,  6.63s/it]

----------------------------------------------------------------------
Document 45: Single_L/2016/page_62.pdf-1
Question 45: what was the growth rate of the s&p 500 index from 2011 to 2016
Original Answer: 98.2%
RAG Answer: 98.1%
Cleaned Original Answer: 98.2
Cleaned RAG Answer: 98.1
Correct: True
----------------------------------------------------------------------




Answering Questions: 46it [04:16,  7.58s/it]

----------------------------------------------------------------------
Document 46: Single_JPM/2009/page_206.pdf-3
Question 46: what was the firm's average sum of contractual principal , interest and fees in 2008 and 2009?
Original Answer: $ 110774.5 million
RAG Answer: $110,774.5 million
Cleaned Original Answer: 110774.5
Cleaned RAG Answer: 110774.5
Correct: True
----------------------------------------------------------------------




Answering Questions: 47it [04:23,  7.41s/it]

----------------------------------------------------------------------
Document 47: Single_GS/2013/page_47.pdf-2
Question 47: what is the percentage change in staff number in 2013?
Original Answer: 1.5%
RAG Answer: 8.57%
Cleaned Original Answer: 1.5
Cleaned RAG Answer: 8.57
Correct: False
----------------------------------------------------------------------




Answering Questions: 48it [04:29,  7.08s/it]

----------------------------------------------------------------------
Document 48: Single_WRK/2019/page_49.pdf-1
Question 48: in 2019 what was the net change in cash in millions
Original Answer: -489.2
RAG Answer: Not provided in the documents.
Cleaned Original Answer: -489.2
Cleaned RAG Answer: None
Correct: False
----------------------------------------------------------------------




Answering Questions: 49it [04:35,  6.73s/it]

----------------------------------------------------------------------
Document 49: Single_LMT/2014/page_31.pdf-2
Question 49: what is the growth rate in the average price of the purchased shares from october to november 2014?
Original Answer: 6.1%
RAG Answer: 7.19%
Cleaned Original Answer: 6.1
Cleaned RAG Answer: 7.19
Correct: False
----------------------------------------------------------------------




Answering Questions: 50it [04:39,  5.87s/it]

----------------------------------------------------------------------
Document 50: Double_NCLH/2017/page_57.pdf
Question 50: what will be the balance of long-term debt after 1 year assuming that everything is paid as planned and no additional debt is raised?
Original Answer: 5805209
RAG Answer: $5,156.7 million
Cleaned Original Answer: 5805209.0
Cleaned RAG Answer: 5156.7
Correct: False
----------------------------------------------------------------------




Answering Questions: 51it [04:49,  7.06s/it]

----------------------------------------------------------------------
Document 51: Double_NCLH/2017/page_57.pdf
Question 51: what portion of the expected payments within the next 12 months is allocated to the repayment of long-term debt?
Original Answer: 31.7%
RAG Answer: $1,125 million
Cleaned Original Answer: 31.7
Cleaned RAG Answer: 1125.0
Correct: False
----------------------------------------------------------------------




Answering Questions: 52it [04:55,  6.71s/it]

----------------------------------------------------------------------
Document 52: Double_WRK/2018/page_53.pdf
Question 52: in 2018 , what percent of the net cash from operations is retained after financing and investing activities?
Original Answer: 15.16%
RAG Answer: 100%
Cleaned Original Answer: 15.16
Cleaned RAG Answer: 100.0
Correct: False
----------------------------------------------------------------------




Answering Questions: 53it [05:03,  7.08s/it]

----------------------------------------------------------------------
Document 53: Double_WRK/2018/page_53.pdf
Question 53: as of september 30 , 2018 , what was the percent of the total debt that was current .
Original Answer: 11.55%
RAG Answer: 11.54%
Cleaned Original Answer: 11.55
Cleaned RAG Answer: 11.54
Correct: True
----------------------------------------------------------------------




Answering Questions: 54it [05:05,  5.57s/it]

----------------------------------------------------------------------
Document 54: Single_ORLY/2015/page_28.pdf-4
Question 54: what is the roi of an investment in the o'reilly automotive inc . from 2010 to 2011?
Original Answer: 32%
RAG Answer: 32%
Cleaned Original Answer: 32.0
Cleaned RAG Answer: 32.0
Correct: True
----------------------------------------------------------------------




Answering Questions: 55it [05:13,  6.39s/it]

----------------------------------------------------------------------
Document 55: Single_SWKS/2010/page_105.pdf-2
Question 55: what is the percent increase in cash and cash equivalents from year 2009 to 2010?
Original Answer: 24.4%
RAG Answer: 24.4%
Cleaned Original Answer: 24.4
Cleaned RAG Answer: 24.4
Correct: True
----------------------------------------------------------------------




Answering Questions: 56it [05:22,  7.08s/it]

----------------------------------------------------------------------
Document 56: Single_MRO/2004/page_57.pdf-4
Question 56: by what percent did effects of foreign operations decrease from 2002 to 2004?
Original Answer: -76.8%
RAG Answer: 76.79%
Cleaned Original Answer: -76.8
Cleaned RAG Answer: 76.79
Correct: True
----------------------------------------------------------------------




Answering Questions: 57it [05:28,  6.90s/it]

----------------------------------------------------------------------
Document 57: Single_ETR/2011/page_22.pdf-3
Question 57: what was the percentage change of the net revenue in 2010
Original Answer: 7.61%
RAG Answer: 24.2%
Cleaned Original Answer: 7.61
Cleaned RAG Answer: 24.2
Correct: False
----------------------------------------------------------------------




Answering Questions: 58it [05:37,  7.38s/it]

----------------------------------------------------------------------
Document 58: Single_IP/2007/page_19.pdf-2
Question 58: what was the percentage change in industry segment operating profits from 2006 to 2007?
Original Answer: 17%
RAG Answer: 16.8%
Cleaned Original Answer: 17.0
Cleaned RAG Answer: 16.8
Correct: True
----------------------------------------------------------------------




Answering Questions: 59it [05:47,  8.24s/it]

----------------------------------------------------------------------
Document 59: Single_RCL/2006/page_37.pdf-2
Question 59: what was the percentage increase in the cash and cash equivalents from 2005 to 2006
Original Answer: -16.6%
RAG Answer: 81.25%
Cleaned Original Answer: -16.6
Cleaned RAG Answer: 81.25
Correct: False
----------------------------------------------------------------------




Answering Questions: 60it [05:53,  7.67s/it]

----------------------------------------------------------------------
Document 60: Single_MRO/2008/page_135.pdf-1
Question 60: by how much did asset retirement obligations decrease from 2007 to 2008?
Original Answer: -14.9%
RAG Answer: $169 million
Cleaned Original Answer: -14.9
Cleaned RAG Answer: 169.0
Correct: False
----------------------------------------------------------------------




Answering Questions: 61it [06:01,  7.83s/it]

----------------------------------------------------------------------
Document 61: Single_IP/2009/page_36.pdf-1
Question 61: north american printing papers net sales where what percent of total printing paper sales in 2009?
Original Answer: 49%
RAG Answer: 49.3%
Cleaned Original Answer: 49.0
Cleaned RAG Answer: 49.3
Correct: True
----------------------------------------------------------------------




Answering Questions: 62it [06:10,  8.16s/it]

----------------------------------------------------------------------
Document 62: Double_RCL/2013/page_18.pdf
Question 62: how many of the total global cruise guests are not from north america or europe?
Original Answer: 3085000
RAG Answer: 3,180,000
Cleaned Original Answer: 3085000.0
Cleaned RAG Answer: 3180000.0
Correct: False
----------------------------------------------------------------------




Answering Questions: 63it [06:22,  9.06s/it]

----------------------------------------------------------------------
Document 63: Double_RCL/2013/page_18.pdf
Question 63: in 2013 , what percentage of global berths came from royal caribbean?
Original Answer: 22.86%
RAG Answer: 22.86%
Cleaned Original Answer: 22.86
Cleaned RAG Answer: 22.86
Correct: True
----------------------------------------------------------------------




Answering Questions: 64it [06:23,  6.76s/it]

----------------------------------------------------------------------
Document 64: Single_AAPL/2010/page_42.pdf-2
Question 64: by how much did total other income and expense decrease from 2008 to 2009?
Original Answer: 47.4%
RAG Answer: $294 million
Cleaned Original Answer: 47.4
Cleaned RAG Answer: 294.0
Correct: False
----------------------------------------------------------------------




Answering Questions: 65it [06:34,  7.89s/it]

----------------------------------------------------------------------
Document 65: Double_ADBE/2009/page_98.pdf
Question 65: what is the growth rate in the other assets from 2008 to 2009?
Original Answer: -11.7%
RAG Answer: -0.3%
Cleaned Original Answer: -11.7
Cleaned RAG Answer: -0.3
Correct: False
----------------------------------------------------------------------




Answering Questions: 66it [06:41,  7.90s/it]

----------------------------------------------------------------------
Document 66: Double_ADBE/2009/page_98.pdf
Question 66: what portion of the prepaid rent is used during 2009?
Original Answer: -1281
RAG Answer: $1,281,000
Cleaned Original Answer: -1281.0
Cleaned RAG Answer: 1281000.0
Correct: False
----------------------------------------------------------------------




Answering Questions: 67it [06:47,  7.25s/it]

----------------------------------------------------------------------
Document 67: Single_GPN/2017/page_91.pdf-2
Question 67: what was the percentage chaning in the total fair value of restricted stock and performance awards vested from 2016 to 2017?
Original Answer: 69%
RAG Answer: 68.5%
Cleaned Original Answer: 69.0
Cleaned RAG Answer: 68.5
Correct: False
----------------------------------------------------------------------




Answering Questions: 68it [06:53,  6.68s/it]

----------------------------------------------------------------------
Document 68: Single_RE/2013/page_40.pdf-1
Question 68: what are the total pre-tax catastrophe losses in the last three years?
Original Answer: 1905.4
RAG Answer: $1,827.6 million
Cleaned Original Answer: 1905.4
Cleaned RAG Answer: 1827.6
Correct: False
----------------------------------------------------------------------




Answering Questions: 69it [07:01,  7.25s/it]

----------------------------------------------------------------------
Document 69: Single_IP/2012/page_57.pdf-2
Question 69: what percentage where north american consumer packaging net sales of total consumer packaging sales in 2011?
Original Answer: 67%
RAG Answer: 67.4%
Cleaned Original Answer: 67.0
Cleaned RAG Answer: 67.4
Correct: True
----------------------------------------------------------------------




Answering Questions: 70it [07:11,  7.95s/it]

----------------------------------------------------------------------
Document 70: Single_ILMN/2006/page_86.pdf-3
Question 70: what was the percent of the growth in the revenues from 2005 to 2006
Original Answer: 45.3%
RAG Answer: 53.2%
Cleaned Original Answer: 45.3
Cleaned RAG Answer: 53.2
Correct: False
----------------------------------------------------------------------




Answering Questions: 71it [07:17,  7.57s/it]

----------------------------------------------------------------------
Document 71: Double_NKE/2015/page_37.pdf
Question 71: what percent of the total for 2017 was due to endorsement contracts?
Original Answer: 58%
RAG Answer: 58.4%
Cleaned Original Answer: 58.0
Cleaned RAG Answer: 58.4
Correct: True
----------------------------------------------------------------------




Answering Questions: 72it [07:25,  7.56s/it]

----------------------------------------------------------------------
Document 72: Double_NKE/2015/page_37.pdf
Question 72: what percent of the total for all years was due to contributions form the year 2020?
Original Answer: 5%
RAG Answer: 4.51%
Cleaned Original Answer: 5.0
Cleaned RAG Answer: 4.51
Correct: True
----------------------------------------------------------------------




Answering Questions: 73it [07:34,  8.00s/it]

----------------------------------------------------------------------
Document 73: Single_LKQ/2018/page_102.pdf-2
Question 73: what was the percentage change in rental expenses from 2017 to 2018?
Original Answer: 21%
RAG Answer: The percentage change in rental expenses from 2017 to 2018 is not explicitly mentioned in the provided documents.
Cleaned Original Answer: 21.0
Cleaned RAG Answer: 2017.0
Correct: False
----------------------------------------------------------------------




Answering Questions: 74it [07:39,  6.98s/it]

----------------------------------------------------------------------
Document 74: Single_DRE/2007/page_39.pdf-4
Question 74: what was the percent of the growth in the recurring tenant improvements from 2006 to 2007
Original Answer: 8.1%
RAG Answer: 8.1%
Cleaned Original Answer: 8.1
Cleaned RAG Answer: 8.1
Correct: True
----------------------------------------------------------------------




Answering Questions: 75it [07:43,  6.25s/it]

----------------------------------------------------------------------
Document 75: Single_DISCA/2012/page_54.pdf-4
Question 75: what was the percentage cumulative total shareholder return on disca common stock from september 18 , 2008 to december 31 , 2012?
Original Answer: 359.67%
RAG Answer: 359.67%
Cleaned Original Answer: 359.67
Cleaned RAG Answer: 359.67
Correct: True
----------------------------------------------------------------------




Answering Questions: 76it [07:51,  6.85s/it]

----------------------------------------------------------------------
Document 76: Single_MMM/2007/page_38.pdf-2
Question 76: what was the percentage change in the net cash used in investing activities from 2006 to 2007
Original Answer: -6.4%
RAG Answer: 200.5%
Cleaned Original Answer: -6.4
Cleaned RAG Answer: 200.5
Correct: False
----------------------------------------------------------------------




Answering Questions: 77it [07:59,  6.98s/it]

----------------------------------------------------------------------
Document 77: Single_D/2002/page_39.pdf-2
Question 77: if the 2003 growth rate is the same as 2002 , what would 2003 gas transmission throughput be in bcf?\\n
Original Answer: 645
RAG Answer: 641 bcf
Cleaned Original Answer: 645.0
Cleaned RAG Answer: 641.0
Correct: False
----------------------------------------------------------------------




Answering Questions: 78it [08:04,  6.49s/it]

----------------------------------------------------------------------
Document 78: Double_TSCO/2017/page_73.pdf
Question 78: what percent of the 2017 end goodwill balance is the goodwill from the acquisition?
Original Answer: 89.14%
RAG Answer: 2.16%
Cleaned Original Answer: 89.14
Cleaned RAG Answer: 2.16
Correct: False
----------------------------------------------------------------------




Answering Questions: 79it [08:11,  6.66s/it]

----------------------------------------------------------------------
Document 79: Double_TSCO/2017/page_73.pdf
Question 79: what percent did the company's goodwill balance increase between the between the beginning of 2016 and the end of 2017?
Original Answer: 808.5%
RAG Answer: 1.0%
Cleaned Original Answer: 808.5
Cleaned RAG Answer: 1.0
Correct: False
----------------------------------------------------------------------




Answering Questions: 80it [08:20,  7.26s/it]

----------------------------------------------------------------------
Document 80: Single_ILMN/2006/page_86.pdf-1
Question 80: what was the percentage change in revenues between 2005 and 2006?
Original Answer: 45%
RAG Answer: 53.2%
Cleaned Original Answer: 45.0
Cleaned RAG Answer: 53.2
Correct: False
----------------------------------------------------------------------




Answering Questions: 81it [08:27,  7.31s/it]

----------------------------------------------------------------------
Document 81: Single_MRO/2011/page_39.pdf-1
Question 81: by how much did total revenues increase from 2010 to 2011?
Original Answer: 25.4%
RAG Answer: $408,757
Cleaned Original Answer: 25.4
Cleaned RAG Answer: 408757.0
Correct: False
----------------------------------------------------------------------




Answering Questions: 82it [08:34,  7.04s/it]

----------------------------------------------------------------------
Document 82: Single_LMT/2018/page_29.pdf-1
Question 82: what is the percentage change in total dividends paid per share from 2017 to 2018?
Original Answer: 9.9%
RAG Answer: 27.5%
Cleaned Original Answer: 9.9
Cleaned RAG Answer: 27.5
Correct: False
----------------------------------------------------------------------




Answering Questions: 83it [08:42,  7.37s/it]

----------------------------------------------------------------------
Document 83: Double_GPN/2007/page_97.pdf
Question 83: what is the total number of approved securities by the security holders?
Original Answer: 12950000
RAG Answer: 4,292,668
Cleaned Original Answer: 12950000.0
Cleaned RAG Answer: 4292668.0
Correct: False
----------------------------------------------------------------------




Answering Questions: 84it [08:47,  6.67s/it]

----------------------------------------------------------------------
Document 84: Double_GPN/2007/page_97.pdf
Question 84: what portion of the approved securities is issued?
Original Answer: 39.9%
RAG Answer: 1,424,356
Cleaned Original Answer: 39.9
Cleaned RAG Answer: 1424356.0
Correct: False
----------------------------------------------------------------------




Answering Questions: 85it [08:52,  6.41s/it]

----------------------------------------------------------------------
Document 85: Single_CE/2017/page_20.pdf-2
Question 85: what was the percentage change in the research and development costs from 2015 to 2016
Original Answer: -34.4%
RAG Answer: -1.42%
Cleaned Original Answer: -34.4
Cleaned RAG Answer: -1.42
Correct: False
----------------------------------------------------------------------




Answering Questions: 86it [09:00,  6.72s/it]

----------------------------------------------------------------------
Document 86: Single_RE/2012/page_31.pdf-3
Question 86: what is the percentage change in net reserves from 2011 to 2012?
Original Answer: -11.3%
RAG Answer: -3.2%
Cleaned Original Answer: -11.3
Cleaned RAG Answer: -3.2
Correct: False
----------------------------------------------------------------------




Answering Questions: 87it [09:08,  7.18s/it]

----------------------------------------------------------------------
Document 87: Single_UPS/2012/page_51.pdf-1
Question 87: what is the growth rate in the net income from 2011 to 2012?
Original Answer: -78.8%
RAG Answer: -3.23%
Cleaned Original Answer: -78.8
Cleaned RAG Answer: -3.23
Correct: False
----------------------------------------------------------------------




Answering Questions: 88it [09:18,  7.86s/it]

----------------------------------------------------------------------
Document 88: Single_MO/2017/page_10.pdf-4
Question 88: what is the percentage change in the weight of smokeless products in operating income from 2015 to 2016?
Original Answer: 2.3%
RAG Answer: 2.34%
Cleaned Original Answer: 2.3
Cleaned RAG Answer: 2.34
Correct: True
----------------------------------------------------------------------




Answering Questions: 89it [09:26,  8.04s/it]

----------------------------------------------------------------------
Document 89: Double_CMCSA/2004/page_30.pdf
Question 89: what percentage of total cable segment capital expenditures in 2004 where due to upgrading of cable systems?
Original Answer: 25%
RAG Answer: 24.9%
Cleaned Original Answer: 25.0
Cleaned RAG Answer: 24.9
Correct: True
----------------------------------------------------------------------




Answering Questions: 90it [09:33,  7.77s/it]

----------------------------------------------------------------------
Document 90: Double_CMCSA/2004/page_30.pdf
Question 90: what percentage of total cable segment capital expenditures in 2005 where due to upgrading of cable systems?
Original Answer: 7%
RAG Answer: 6.67%
Cleaned Original Answer: 7.0
Cleaned RAG Answer: 6.67
Correct: True
----------------------------------------------------------------------




Answering Questions: 91it [09:42,  8.13s/it]

----------------------------------------------------------------------
Document 91: Single_STT/2014/page_69.pdf-2
Question 91: what is the percentage change in the average total short-duration advances from 2013 to 2014?
Original Answer: 3.1%
RAG Answer: 3.15%
Cleaned Original Answer: 3.1
Cleaned RAG Answer: 3.15
Correct: True
----------------------------------------------------------------------




Answering Questions: 92it [09:48,  7.38s/it]

----------------------------------------------------------------------
Document 92: Single_MRO/2009/page_127.pdf-1
Question 92: by what percentage did asset retirement obligations increase from 2008 to 2009?
Original Answer: 14.2%
RAG Answer: 14.2%
Cleaned Original Answer: 14.2
Cleaned RAG Answer: 14.2
Correct: True
----------------------------------------------------------------------




Answering Questions: 93it [09:54,  7.13s/it]

----------------------------------------------------------------------
Document 93: Double_AMT/2010/page_111.pdf
Question 93: at december 31 , 2010 what was the percent of the total net operating loss carry forwards set to expire between 2021 and 2025
Original Answer: 64.6%
RAG Answer: 65.1%
Cleaned Original Answer: 64.6
Cleaned RAG Answer: 65.1
Correct: True
----------------------------------------------------------------------




Answering Questions: 94it [10:00,  6.74s/it]

----------------------------------------------------------------------
Document 94: Double_AMT/2010/page_111.pdf
Question 94: what is the total net operating loss carryforwards?
Original Answer: 2484034
RAG Answer: $1,468.4 million
Cleaned Original Answer: 2484034.0
Cleaned RAG Answer: 1468.4
Correct: False
----------------------------------------------------------------------




Answering Questions: 95it [10:08,  7.14s/it]

----------------------------------------------------------------------
Document 95: Double_UAA/2017/page_52.pdf
Question 95: what percentage of operating income was the emea segment in 2016?
Original Answer: 3%
RAG Answer: 2.73%
Cleaned Original Answer: 3.0
Cleaned RAG Answer: 2.73
Correct: True
----------------------------------------------------------------------




Answering Questions: 96it [10:16,  7.28s/it]

----------------------------------------------------------------------
Document 96: Double_UAA/2017/page_52.pdf
Question 96: what portion of total operating income is generated by north america segment in 2016?
Original Answer: 97.8%
RAG Answer: $408,424
Cleaned Original Answer: 97.8
Cleaned RAG Answer: 408424.0
Correct: False
----------------------------------------------------------------------




Answering Questions: 97it [10:24,  7.39s/it]

----------------------------------------------------------------------
Document 97: Single_IP/2007/page_30.pdf-2
Question 97: what percent of printing papers sales in 2006 was from north american printing papers net sales?
Original Answer: 66%
RAG Answer: 65.67%
Cleaned Original Answer: 66.0
Cleaned RAG Answer: 65.67
Correct: True
----------------------------------------------------------------------




Answering Questions: 98it [10:28,  6.43s/it]

----------------------------------------------------------------------
Document 98: Single_AAL/2015/page_51.pdf-3
Question 98: by how much did american airlines group inc . common stock out preform the s&p 500 index over the 4 year period?
Original Answer: 48%
RAG Answer: 62%
Cleaned Original Answer: 48.0
Cleaned RAG Answer: 62.0
Correct: False
----------------------------------------------------------------------




Answering Questions: 99it [10:34,  6.32s/it]

----------------------------------------------------------------------
Document 99: Double_LKQ/2009/page_77.pdf
Question 99: what was the percentage change in rental expense for operating leases from 2007 to 2008?
Original Answer: 84%
RAG Answer: 84.21%
Cleaned Original Answer: 84.0
Cleaned RAG Answer: 84.21
Correct: True
----------------------------------------------------------------------




Answering Questions: 100it [10:39,  6.39s/it]

----------------------------------------------------------------------
Document 100: Double_LKQ/2009/page_77.pdf
Question 100: what was the percentage change in rental expense for operating leases from 2008 to 2009?
Original Answer: 17%
RAG Answer: 16.73%
Cleaned Original Answer: 17.0
Cleaned RAG Answer: 16.73
Correct: True
----------------------------------------------------------------------


Correct Count: 44.0
Accuracy: 0.440





In [24]:
with open(tmp_output_path, 'w') as f:
    json.dump(limit_qa_data, f, indent=4)

In [25]:
results['accuracy'] = accuracy

In [26]:
import pandas as pd

results_df = pd.DataFrame({
    'Metric': results.keys(),
    'Value': results.values()
})

results_df.to_csv(results_path, index=False)

In [28]:
display(results_df)

Unnamed: 0,Metric,Value
0,overall_retrieval_recall,0.62
1,overall_retrieval_mrr,0.267619
2,successful_retrieval_mrr,0.431644
3,overall_reranking_recall,0.54
4,overall_reranking_mrr,0.426667
5,successful_reranking_recall,0.870968
6,successful_reranking_mrr,0.790123
7,accuracy,0.44
