# Q&A over PDF using vectorstores

In [1]:
from langchain.document_loaders import UnstructuredFileLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter

In [2]:
index_creator = VectorstoreIndexCreator(
    vectorstore_cls=Chroma, 
    embedding=OpenAIEmbeddings(),
    text_splitter=CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
)

In [3]:
loader = UnstructuredFileLoader("EV0320161ENN.pdf", strategy="fast", mode="elements")
index=index_creator.from_loaders([loader])

In [29]:
query="what are the conditions to get an electric car loan in europe"
index.query(query)

' The conditions to get an electric car loan in Europe include a longer repayment period, a competitive interest rate, and no origination fees.'

In [11]:
query = "tell me more about Portfolio Temperature Rating Approach"
index.query_with_sources(query)

{'question': 'tell me more about Portfolio Temperature Rating Approach',
 'answer': ' The Portfolio Temperature Rating Approach is a method used by banks to determine their current portfolio temperature and take actions to align them to long-term temperature goals by engaging with portfolio companies. Banks commit to having a portion of their clients set their own SBTi-approved targets such that the FI is on a linear path to 100% portfolio coverage by 2040. The approach involves measuring GHG emissions per investment and/or loan, calculating the share of borrowers’ and/or investees’ emissions that should be attributed to the bank, and dividing the sum of attributed emissions by the sum of attributed activity data of all investments and/or loans.\n',
 'sources': 'EV0320161ENN.pdf'}

# similarity search with scores

In [21]:
from langchain.document_loaders import PyPDFLoader
loader=PyPDFLoader("EV0320161ENN.pdf")
pages=loader.load_and_split()

index=Chroma.from_documents(pages,OpenAIEmbeddings())

docs=index.similarity_search_with_score(query,
                                        k=3)

docs

Using embedded DuckDB without persistence: data will be transient
Retrying langchain.embeddings.openai.embed_with_retry.<locals>._embed_with_retry in 4.0 seconds as it raised APIConnectionError: Error communicating with OpenAI: ('Connection aborted.', ConnectionResetError(54, 'Connection reset by peer')).


[(Document(page_content='77  \nMonitoring  Monitor portfolio evolution and alignment through use of a specific \ndashboard  \nEach of these steps requires specific decisions to be taken. For instance, when setting \ntargets there are different approaches that can be followed. As illustrated in the Science -\nbased target initiative report162, potential approaches to be adopted by banks  include:  \ni. Sectoral Decarbonization Approach (SDA);  \nii. SBT Portfolio Coverage Approach;  \niii. Portfolio Temperature Rating Approach.  \nAn overview is provided in the summary below, with a deep dive on SDA approach for \ncalculating physical emissions intensity.  \n \nSource: SBTi and BlackRock FMA analysis  \nTransition risk and physical risk measurement exercises help banks assess the financial \nmateriality of climate -related risks, while pathway alignment or net zero approaches \ncapture the environmental and social materiality perspective. Transition risk and p hysical \nrisk exercises s