## Simple Multi Document QA
- https://colab.research.google.com/drive/1mIO99-4QWgIKvjgAFj0vvEbQi5xgNCPk?usp=sharing#scrollTo=cNOqE4rcLyy-

In [1]:
from langchain.document_loaders import UnstructuredPDFLoader
from langchain.indexes import VectorstoreIndexCreator
import os,sys
import getpass


- #### Load Multiple PDF Files with Longchain document loader

In [2]:
pdf_folder_path = f'/root/workspace/data/DOCs/PDF'
os.listdir(pdf_folder_path)

['Belgium_2022.pdf',
 'Indonesia_2023.pdf',
 'Tanzania_2023.pdf',
 'UAE_2021.pdf',
 'USA_2022.pdf']

In [3]:
# location of the pdf file/files. 
loaders = [UnstructuredPDFLoader(os.path.join(pdf_folder_path, fn)) for fn in os.listdir(pdf_folder_path)]
loaders

[<langchain_community.document_loaders.pdf.UnstructuredPDFLoader at 0x7fcef81c70d0>,
 <langchain_community.document_loaders.pdf.UnstructuredPDFLoader at 0x7fcef06a0250>,
 <langchain_community.document_loaders.pdf.UnstructuredPDFLoader at 0x7fce31cc1fd0>,
 <langchain_community.document_loaders.pdf.UnstructuredPDFLoader at 0x7fcef81cec40>,
 <langchain_community.document_loaders.pdf.UnstructuredPDFLoader at 0x7fcef80a24f0>]

In [10]:
doc_0 = loaders[0].load()
doc_0



- #### Vector Store

In [14]:
# input your API key
os.environ["OPENAI_API_KEY"] = getpass.getpass(prompt='OpenAI API Token:')

In [15]:
## this includes chunking and embedding 
## in the backend, it is using openai embeddings 
index = VectorstoreIndexCreator().from_loaders(loaders)
index

VectorStoreIndexWrapper(vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x7fce5c270f70>)

In [16]:
index.query_with_sources('what are the policy suggested when a downside or upside risks materialized?')

{'question': 'what are the policy suggested when a downside or upside risks materialized?',
 'answer': ' The suggested policies when a downside or upside risk materializes are to carefully balance risks to inflation, output, and financial stability, to use fiscal policy in a counter-cyclical role, and to consider front loading fiscal stimulus related to structural reforms. \n',
 'sources': '/root/workspace/data/DOCs/PDF/Indonesia_2023.pdf, /root/workspace/data/DOCs/PDF/UAE_2021.pdf'}

In [17]:
index.query_with_sources('Which were the main risks and spillovers discussed?')


{'question': 'Which were the main risks and spillovers discussed?',
 'answer': ' The main risks and spillovers discussed were mitigating spillovers from the war in Ukraine and promoting sustainable and inclusive private sector-led growth. \n',
 'sources': '/root/workspace/data/DOCs/PDF/Tanzania_2023.pdf, /root/workspace/data/DOCs/PDF/Indonesia_2023.pdf, /root/workspace/data/DOCs/PDF/UAE_2021.pdf'}

In [18]:
index.query_with_sources('What is the overall level of risks and how is the balance of risks characterized?  ')

{'question': 'What is the overall level of risks and how is the balance of risks characterized?  ',
 'answer': ' The overall level of risks is assessed as "low" and the balance of risks is characterized as "balanced". \n',
 'sources': '/root/workspace/data/DOCs/PDF/Indonesia_2023.pdf, /root/workspace/data/DOCs/PDF/UAE_2021.pdf, /root/workspace/data/DOCs/PDF/Belgium_2022.pdf, /root/workspace/data/DOCs/PDF/Tanzania_2023.pdf'}

In [19]:
index.query_with_sources('What is the sentiment of the bottom-line assessment on systemic risks?')


{'question': 'What is the sentiment of the bottom-line assessment on systemic risks?',
 'answer': ' The sentiment of the bottom-line assessment on systemic risks is "low". \n',
 'sources': '/root/workspace/data/DOCs/PDF/Indonesia_2023.pdf, /root/workspace/data/DOCs/PDF/Belgium_2022.pdf, /root/workspace/data/DOCs/PDF/Indonesia_2023.pdf, /root/workspace/data/DOCs/PDF/Tanzania_2023.pdf'}

In [20]:
index.query_with_sources('How do the risks discussed in the reports compared with news analysis publications by the authorities and other IFIs?')


{'question': 'How do the risks discussed in the reports compared with news analysis publications by the authorities and other IFIs?',
 'answer': ' The risks discussed in the reports are similar to those discussed in news analysis publications by the authorities and other IFIs.\n',
 'sources': '/root/workspace/data/DOCs/PDF/UAE_2021.pdf, /root/workspace/data/DOCs/PDF/Indonesia_2023.pdf, /root/workspace/data/DOCs/PDF/Belgium_2022.pdf'}

###### References
- https://python.langchain.com/docs/modules/data_connection/document_loaders/pdf
- https://python.langchain.com/docs/modules/data_connection/vectorstores/
- https://python.langchain.com/docs/modules/data_connection/retrievers/