In [7]:
from dotenv import load_dotenv
import os
import streamlit as st
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
from langchain.callbacks import get_openai_callback
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.embeddings import GPT4AllEmbeddings
import glob
from tqdm import tqdm
import pickle
import json
import numpy as np

# load qa dataset
# Reading JSON data
json_filename = "QA_dataset.json"
with open(json_filename, 'r') as file:
    data_loaded = json.load(file)

correctness = []
for item in tqdm(data_loaded):
    
    try:
        pdf = item["filename"]
        ques= [item["question_1"] , item["question_2"], item["question_3"]]
        anss = [item["answer_1"], item["answer_2"], item["answer_3"]]
    except:
        print("Key error, please check. ", item)
        continue
    
    pdf_reader = PdfReader(pdf)
    text = ""
    for page in pdf_reader.pages:
        text += page.extract_text()


    os.environ["OPENAI_API_KEY"] = # ENTER_HERE

    text_splitter = CharacterTextSplitter(
        separator="\n",
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len
    )  

    chunks = text_splitter.split_text(text)

    # embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")
    embeddings = GPT4AllEmbeddings()
    save_faiss_index = False
    index_filename = "faiss_index.gpt4all"
    if save_faiss_index:
        if not os.path.exists(index_filename):
            knowledge_base = FAISS.from_texts(chunks, embeddings)
            knowledge_base.save_local(index_filename)
        else:
            knowledge_base = FAISS.load_local(index_filename, embeddings, allow_dangerous_deserialization=True)
    else:
        knowledge_base = FAISS.from_texts(chunks, embeddings)
        
    for query, answer in zip(ques, anss):
        docs = knowledge_base.similarity_search(query)

        llm = OpenAI()
        chain = load_qa_chain(llm, chain_type="stuff")
        response = chain.run(input_documents=docs, question=query)

        print(f"Query: {query}")
        print(f"Ans: {answer}")
        print(f"Res: {response}")
        
        verifier_prompt = """## Role: Answer verifier

## Goal
You can judge whether the answer is correct or not. 

## Rule
- If the key information predicted answer is same as the ground truth answer, then the answer is correct.


## Output format
{
"reason": "fill the reason why the predicted answer is wrong (False) or correct (True).", 
"answer": True or False
}

""" + f"""

The question is: {query}
Ground truth is: {answer}
Predicted answer is: {response}
"""
        # try:
        verified_output = llm.invoke(verifier_prompt)
        print("verified_output: ", verified_output)
        verified_bool = verified_output.split('"answer": ')[-1]
        if "True" in verified_bool:
            verified_bool = 1
        else:
            verified_bool = 0
        # except:
        #     verified_bool = 0
        
        print(f"Correct or not: {verified_bool}")
        correctness.append(verified_bool)
        
print(correctness)
print(np.mean(correctness))

  0%|          | 0/15 [00:00<?, ?it/s]

Query: What was Amazon's total revenue in 2023?
Ans: $575B
Res:  Amazon's total revenue in 2023 was $575 billion.
verified_output:  
{
"reason": "The predicted answer is the same as the ground truth answer, which is $575B. Therefore, the answer is correct.", 
"answer": True
}
Correct or not: 1
Query: By what percentage did Amazon's AWS revenue increase in 2023?
Ans: 13%
Res:  13%
verified_output:  
{
"reason": "The predicted answer is same as the ground truth answer.", 
"answer": True
}
Correct or not: 1
Query: How much did Amazon's operating income improve in 2023 from the previous year?
Ans: 201%
Res:  Operating income in 2023 improved 201% Y oY from $12.2B to $36.9B. This is a dramatic improvement of $24.7B.


  7%|▋         | 1/15 [00:38<09:03, 38.82s/it]

verified_output:  {
"reason": "The answer is correct because it accurately states the percentage improvement and the specific numbers for both the previous year and 2023. The language used is also consistent with the question, making it a valid answer.", 
"answer": True
}
Correct or not: 1
Key error, please check.  {'question_1': 'When was Huawei founded?', 'question_2': 'How much revenue did Huawei generate in 2023?', 'question_3': 'How many employees does Huawei have in 2023?', 'filename': './docs\\annual_report_2023_en.pdf', 'first_5_page_texts': 'Huawei Investment & Holding Co., Ltd.\n2023 ANNUAL REPORTWho is Huawei?\nFounded in 1987, Huawei is a leading \nglobal provider of information and \ncommunications technology (ICT) \ninfrastructure and smart devices. We \nhave approximately 207,000 employees \nand we operate in over 170 countries \nand regions, serving more than three \nbillion people around the world. We are \ncommitted to bringing digital to every \nperson, home and orga

Created a chunk of size 2612, which is longer than the specified 1000
Created a chunk of size 1801, which is longer than the specified 1000


Query: What was Apple Inc.'s net income in the twelve months ended September 30, 2023?
Ans: $96,995 million
Res:  Apple Inc.'s net income in the twelve months ended September 30, 2023 was /dollarsign 96,995 million.
verified_output:  
{
"reason": "The predicted answer correctly identifies the key information (net income) and provides the exact amount ($96,995 million) as the ground truth answer. Therefore, the answer is correct.",
"answer": True
}
Correct or not: 1
Query: How much did Apple Inc. spend on repurchases of common stock in the twelve months ended September 30, 2023?
Ans: $77,550 million
Res:  $77,550 million.
verified_output:  
{
"reason": "The predicted answer is exactly the same as the ground truth answer, indicating that it is correct.", 
"answer": True
}
Correct or not: 1
Query: What was the total net sales for Apple Inc. in the twelve months ended September 30, 2023?
Ans: $383,285 million
Res:  The total net sales for Apple Inc. in the twelve months ended September 30,

 20%|██        | 3/15 [00:47<02:38, 13.17s/it]

verified_output:  {
"reason": "The predicted answer is the same as the ground truth answer.", 
"answer": True
}
Correct or not: 1
Query: What was Huawei's revenue in 2021?
Ans: CNY636.8 billion
Res:  Huawei's revenue in 2021 was CNY636.8 billion.
verified_output:  
{
"reason": "The predicted answer is the same as the ground truth answer, so it is correct.", 
"answer": True
}
Correct or not: 1
Query: How much did Huawei invest in R&D in 2021?
Ans: CNY142.7 billion
Res:  In 2021, Huawei invested CNY142.7 billion in R&D, representing 22.4% of the company's total revenue.
verified_output:  {
"reason": "The key information predicted answer (CNY142.7 billion) is same as the ground truth answer, then the answer is correct.", 
"answer": True
}
Correct or not: 1
Query: What was Huawei's net profit margin in 2021?
Ans: 17.9%
Res:  Huawei's net profit margin in 2021 was 17.9%.


 27%|██▋       | 4/15 [01:44<05:13, 28.49s/it]

verified_output:  
{
"reason": "The predicted answer is the same as the ground truth answer, therefore it is correct.", 
"answer": True
}
Correct or not: 1
Query: When was Huawei founded?
Ans: 1987
Res:  Huawei was founded over 30 years ago, but the exact year is not specified in the given context. It is mentioned that in 2022, Huawei had been committed to pushing the boundaries of ICT for more than 30 years, so the company was likely founded sometime in the late 1980s or early 1990s.
verified_output:  
{
"reason": "The predicted answer is not an exact match to the ground truth answer. While it is mentioned that Huawei has been pushing the boundaries of ICT for over 30 years, the exact year of its founding is not specified in the given context. Therefore, the predicted answer cannot be confirmed as correct.", 
"answer": False
}
Correct or not: 0
Query: How many employees does Huawei have?
Ans: 207,000
Res:  As of December 31, 2022, Huawei had 114,000+ employees, representing 55.4% of t

 33%|███▎      | 5/15 [02:43<06:24, 38.41s/it]

verified_output:  {
"reason": "The predicted answer is correct because the key information is same as the ground truth answer.", 
"answer": True
}
Correct or not: 1


Created a chunk of size 1343, which is longer than the specified 1000
Created a chunk of size 1114, which is longer than the specified 1000
Created a chunk of size 1020, which is longer than the specified 1000


Query: What was the global comparable sales growth percentage for McDonald's in the last year?
Ans: 9%
Res:  I don't know. This information is not provided in the given context. We would need to refer to the specific financial reports from the company to find this information.
verified_output:  {
"reason": "This information is not provided in the given context. We would need to refer to the specific financial reports from the company to find this information.", 
"answer": False
}
Correct or not: 0
Query: How many active loyalty users did McDonald's have by the end of 2023?
Ans: 150 million
Res:  McDonald's had 150 million active loyalty users by the end of 2023.
verified_output:  
{
"reason": "The predicted answer is the same as the ground truth answer, so it is correct.", 
"answer": True
}
Correct or not: 1
Query: By the end of 2027, how many restaurants worldwide does McDonald's plan to have?
Ans: 50,000
Res:  McDonald's plans to expand its footprint to 50,000 restaurants worldwide b

 40%|████      | 6/15 [03:56<07:26, 49.58s/it]

verified_output:  
{
"reason": "The predicted answer is the same as the ground truth answer.", 
"answer": True
}
Correct or not: 1
Key error, please check.  {'question_1': "What is McDonald's global sales figure for 2021?", 'question_2': "By what year does McDonald's commit to achieving net-zero emissions across its global operations?", 'question_3': 'How many loyalty members were enrolled in the U.S. six months after the launch?', 'filename': './docs\\McDonald MCD 2021 Annual Report.pdf', 'first_5_page_texts': '22\n00\n1\nA year of resilience and great progress\nAnnual Report 22\nand translate it into meaningful experiences for  \nour customers, embodying our mission to  \nmake delicious, feel-good moments easy for  \neveryone. They bring our purpose to feed and foster  \ncommunities to life every second of the day. And  \nthey do this all with a grounding in both knowing  \ntheir communities and being good neighbors.  \nA little more than a year ago, I said that we  \nwere witnessing

 53%|█████▎    | 8/15 [05:44<06:02, 51.79s/it]

verified_output:  {
"reason": "The predicted answer includes all the key information from the ground truth answer, which is 'almost 50 million'. Therefore, the answer is correct.", 
"answer": True
}
Correct or not: 1
Query: What is the per share dividend amount announced by McDonald's?
Ans: $1.67
Res:  The per share dividend amount announced by McDonald's is $1.67.
verified_output:  
{
"reason": "The predicted answer is the exact same as the ground truth, so it is correct.",
"answer": True
}
Correct or not: 1
Query: When is the dividend payable date for McDonald's shareholders?
Ans: March 15, 2024
Res:  March 15, 2024
verified_output:  Output: 
{
"reason": "The predicted answer matches the ground truth answer.",
"answer": True
}
Correct or not: 1
Query: How many McDonald's locations are there worldwide?
Ans: over 40,000
Res:  Approximately 40,000 locations in over 100 countries.


 60%|██████    | 9/15 [05:51<04:02, 40.36s/it]

verified_output:  
{
"reason": "The predicted answer is not an exact match with the ground truth, but it still provides a correct estimate.", 
"answer": True
}
Correct or not: 1
Key error, please check.  {'question_1': 'What is the new sustainable finance target set by OCBC for 2025?', 'question_2': "By how much did OCBC's net profit increase in 2021 compared to 2020?", 'question_3': 'What is the Common Equity Tier 1 Capital Adequacy Ratio of OCBC in 2021?', 'filename': './docs\\OCBC-2021-annual-review-en.pdf', 'first_5_page_texts': "Annual Report 2021\nCreating a Sustainable World.\nIt’s all in our hands.\nOversea-Chinese Banking Corporation Limited  Annual Report 2021Creating a Sustainable World.\nIt's all in our hands.\nFeatured on the cover this year is the \nlandscape of/uni00A0Guilin, located in Guangxi Province, China. The/uni00A0area surrounding Guilin has some of the most/uni00A0beautiful karst/uni00A0mountains in the world. Its  Li (Lijiang) River is a sight to behold, and/un

 73%|███████▎  | 11/15 [07:11<02:40, 40.19s/it]

verified_output:  
Output: 
{
"reason": "The predicted answer is correct because it directly states the increase in the dividend per share from the previous year for 2022, which is 28%.", 
"answer": True
}
Correct or not: 1
Query: What was Nestlé's total group sales in CHF for 2023?
Ans: 93.0 billion
Res:  I don't know.
verified_output:  
{
    "reason": "The predicted answer does not provide a numerical value and therefore cannot be compared to the ground truth answer.",
    "answer": False
}
Correct or not: 0
Query: What is the proposed dividend per share in CHF for 2023?
Ans: 3.00
Res:  The proposed dividend per share in CHF for 2023 is CHF 3.00.
verified_output:  {
"reason": "The predicted answer is correct as it directly states the dividend per share in CHF for 2023 as CHF 3.00.", 
"answer": True
}
Correct or not: 1
Query: How much did Nestlé reduce its GHG emissions by 2023 from 2018 levels?
Ans: 13.58%
Res:  Nestlé reduced its GHG emissions by approximately 13.58% from 2018 leve

 80%|████████  | 12/15 [07:31<01:46, 35.55s/it]

verified_output:  
{
"reason": "The predicted answer accurately states the percentage (13.58%) and the time period (2023) in which Nestlé reduced its GHG emissions, which matches the ground truth answer. Therefore, the answer is correct.", 
"answer": True
} 
Correct or not: 1


Created a chunk of size 2396, which is longer than the specified 1000
Created a chunk of size 2396, which is longer than the specified 1000
Created a chunk of size 2673, which is longer than the specified 1000
Created a chunk of size 1430, which is longer than the specified 1000
Created a chunk of size 1009, which is longer than the specified 1000
Created a chunk of size 1529, which is longer than the specified 1000
Created a chunk of size 1130, which is longer than the specified 1000
Created a chunk of size 1433, which is longer than the specified 1000
Created a chunk of size 1468, which is longer than the specified 1000
Created a chunk of size 1119, which is longer than the specified 1000
Created a chunk of size 1542, which is longer than the specified 1000
Created a chunk of size 1201, which is longer than the specified 1000
Created a chunk of size 1483, which is longer than the specified 1000
Created a chunk of size 1595, which is longer than the specified 1000
Created a chunk of s

Query: What is the date of the annual report for Shell plc?
Ans: December 31, 2022
Res:  The annual report for Shell plc is for the year ended December 31, 2022.
verified_output:  
{
"reason": "The predicted answer accurately provides the requested information (the date of the annual report) and matches the ground truth answer. Therefore, the answer is correct.", 
"answer": True
}
Correct or not: 1
Query: What accounting standards have the Consolidated Financial Statements of Shell plc and its subsidiaries been prepared in accordance with?
Ans: International Financial Reporting Standards (IFRS) as issued by the International Accounting Standards Board (IASB)
Res:  The Consolidated Financial Statements have been prepared in accordance with UK-adopted international accounting standards and with the requirements of the UK Companies Act 2006.
verified_output:  {
"reason": "The answer is not correct because it doesn't provide the name of the accounting standards.", 
"answer": False
}
Correc

 87%|████████▋ | 13/15 [10:27<02:20, 70.43s/it]

verified_output:  {
    "reason": "",
    "answer": true
}

Correct or not: 0


Created a chunk of size 1430, which is longer than the specified 1000
Created a chunk of size 1032, which is longer than the specified 1000
Created a chunk of size 1086, which is longer than the specified 1000
Created a chunk of size 1110, which is longer than the specified 1000
Created a chunk of size 1985, which is longer than the specified 1000
Created a chunk of size 1476, which is longer than the specified 1000
Created a chunk of size 1104, which is longer than the specified 1000
Created a chunk of size 1311, which is longer than the specified 1000
Created a chunk of size 1214, which is longer than the specified 1000
Created a chunk of size 2296, which is longer than the specified 1000
Created a chunk of size 1203, which is longer than the specified 1000
Created a chunk of size 1042, which is longer than the specified 1000
Created a chunk of size 2132, which is longer than the specified 1000
Created a chunk of size 1747, which is longer than the specified 1000
Created a chunk of s

Query: What year is the Shell plc Annual Report for?
Ans: 2023
Res: 
The Shell plc Annual Report is for the year ended December 31, 2023.
verified_output:  
{
"reason": "The predicted answer matches the ground truth answer of 2023, therefore it is correct.",
"answer": True
}
Correct or not: 1
Query: By what percentage did Shell reduce its total combined Scope 1 and 2 greenhouse gas emissions from 2016 to 2023?
Ans: 31%
Res: 

Shell reduced its total combined Scope 1 and 2 greenhouse gas emissions by approximately 20% from 2016 to 2023.
verified_output:  
{
"reason": "The predicted answer is wrong because it states a reduction of 20%, while the ground truth shows a reduction of 31%.", 
"answer": False
}
Correct or not: 0
Query: Who was the Chair of Shell plc mentioned in the 2023 Annual Report?
Ans: Sir Andrew Mackenzie
Res:  Sir Andrew Mackenzie


 93%|█████████▎| 14/15 [13:24<01:38, 98.29s/it]

verified_output:  
{
"reason": "The predicted answer is the same as the ground truth, so the answer is correct.", 
"answer": True
}
Correct or not: 1


Created a chunk of size 2512, which is longer than the specified 1000
Created a chunk of size 1280, which is longer than the specified 1000
Created a chunk of size 1591, which is longer than the specified 1000
Created a chunk of size 1031, which is longer than the specified 1000
Created a chunk of size 1256, which is longer than the specified 1000
Created a chunk of size 1077, which is longer than the specified 1000


Query: When did SoftBank Group Corp. start preparing its consolidated financial statements in accordance with the International Financial Reporting Standards (IFRS)?
Ans: From the three-month period ended June 30, 2013
Res: 

The Company has prepared the consolidated financial statements in accordance with the International Financial Reporting Standards (IFRS) from the three-month period ended June 30, 2013.
verified_output:  
{
"reason": "The predicted answer is the same as the ground truth answer, so it is correct.", 
"answer": True
}
Correct or not: 1
Query: What is SoftBank Group Corp.'s (SBG) mission as stated in the 2023 annual report?
Ans: Information Revolution—Happiness for everyone
Res:  The 2023 annual report does not state a specific mission for SBG.
verified_output:  {
"reason": "The predicted answer does not match the ground truth answer.", 
"answer": False
}
Correct or not: 0
Query: What historical figure and organization does SBG draw inspiration from, as mentioned in t

100%|██████████| 15/15 [14:17<00:00, 57.19s/it]

verified_output:  {
"reason": "The predicted answer is the same as the ground truth.", 
"answer": True
}

Correct or not: 1
[1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1]
0.75



