In [None]:
from dotenv import load_dotenv
import os
import streamlit as st
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
from langchain.callbacks import get_openai_callback
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.embeddings import GPT4AllEmbeddings
import glob
from tqdm import tqdm
import pickle
import json
import numpy as np
import concurrent.futures
import pandas as pd

# load qa dataset
# Reading JSON data
json_filename = "QA_dataset.json"
with open(json_filename, 'r') as file:
    data_loaded = json.load(file)

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())  # read local .env file

def process(item):

    try:
        pdf = item["filename"]
        ques= [item["question_1"] , item["question_2"], item["question_3"]]
        anss = [item["answer_1"], item["answer_2"], item["answer_3"]]
    except:
        print("Key error, please check. ", item)
        return []
    
    pdf_reader = PdfReader(pdf)
    text = ""
    for page in pdf_reader.pages:
        text += page.extract_text()

    text_splitter = CharacterTextSplitter(
        separator="\n",
        chunk_size=500,
        chunk_overlap=100,
        length_function=len
    )  

    chunks = text_splitter.split_text(text)

    # embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")
    embeddings = GPT4AllEmbeddings()
    save_faiss_index = False
    index_filename = "faiss_index.gpt4all"
    if save_faiss_index:
        if not os.path.exists(index_filename):
            knowledge_base = FAISS.from_texts(chunks, embeddings)
            knowledge_base.save_local(index_filename)
        else:
            knowledge_base = FAISS.load_local(index_filename, embeddings, allow_dangerous_deserialization=True)
    else:
        knowledge_base = FAISS.from_texts(chunks, embeddings)
        
    result = []
    for query, answer in zip(ques, anss):
        docs = knowledge_base.similarity_search(query)

        llm = OpenAI()
        chain = load_qa_chain(llm, chain_type="stuff")
        response = chain.run(input_documents=docs, question=query)

        print("======================")
        print(f"Query: {query}")
        print(f"Ans: {answer}")
        print(f"Res: {response}")
        
        verifier_prompt = """## Role: Answer verifier

## Goal
You can judge whether the answer is correct or not. 

## Rule
- If the key information predicted answer is same as the ground truth answer, then the answer is correct.


## Output format
{
"reason": "fill the reason why the predicted answer is wrong (False) or correct (True).", 
"answer": True or False
}

""" + f"""

The question is: {query}
Ground truth is: {answer}
Predicted answer is: {response}
"""
        verified_output = llm.invoke(verifier_prompt)
        print("verified_output: ", verified_output)
        verified_bool = verified_output.split('"answer": ')[-1]
        if "True" in verified_bool:
            verified_bool = 1
        else:
            verified_bool = 0

        result.append([pdf, query, answer, response, verified_bool])

    return result

with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
    results = list(executor.map(process, data_loaded))
result = []
for x in results:
    result += x
    
# save results
os.makedirs("./results", exist_ok=True)
result_csv = "./results/qa_single_cs-1000.csv"
df = pd.DataFrame(result, columns = ["filename", "query", "answer", "response", "verified_bool"])
df.to_csv(result_csv, index=False)

# calculate the accuracy
correctness = df.loc[:, "verified_bool"].values
print(correctness)
print(np.mean(correctness))