In [2]:
! pip install -q --user google-cloud-aiplatform google-cloud-discoveryengine langchain-google-vertexai langchain-google-community

In [1]:
# Restart kernel after packages are installed so that your environment can access the new packages
import IPython
import time

app = IPython.Application.instance()
app.kernel.do_shutdown(True)

{'status': 'ok', 'restart': True}

In [1]:
PROJECT_ID = "qwiklabs-gcp-02-b65effade7cd"
LOCATION = "us-east1"

import vertexai

vertexai.init(project=PROJECT_ID, location=LOCATION)

In [2]:
DATA_STORE_ID = "qna-datastore-id"  # @param {type:"string"}
DATA_STORE_LOCATION = "global"  # @param {type:"string"}

MODEL = "gemini-2.0-flash"  # @param {type:"string"}

if PROJECT_ID == "YOUR_PROJECT_ID" or DATA_STORE_ID == "YOUR_DATA_STORE_ID":
    raise ValueError(
        "Please set the PROJECT_ID, DATA_STORE_ID constants to reflect your environment."
    )

In [3]:
from langchain.chains import RetrievalQA
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain.prompts import PromptTemplate

from langchain_google_vertexai import ChatVertexAI
from langchain_google_community import VertexAISearchRetriever
from langchain_google_community import VertexAIMultiTurnSearchRetriever

In [4]:
llm = ChatVertexAI(model_name=MODEL)

retriever = VertexAISearchRetriever(
    project_id=PROJECT_ID,
    location_id=DATA_STORE_LOCATION,
    data_store_id=DATA_STORE_ID,
    get_extractive_answers=True,
    max_documents=10,
    max_extractive_segment_count=1,
    max_extractive_answer_count=5,
)



In [5]:
search_query = "What was Alphabet's Revenue in Q2 2021?"  # @param {type:"string"}

retrieval_qa = RetrievalQA.from_chain_type(
    llm=llm, chain_type="stuff", retriever=retriever
)
retrieval_qa.invoke(search_query)

{'query': "What was Alphabet's Revenue in Q2 2021?",
 'result': "Alphabet's revenue in Q2 2021 was $61.880 billion."}

In [None]:
retrieval_qa = RetrievalQA.from_chain_type(
    llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True
)

results = retrieval_qa.invoke(search_query)

print("*" * 79)
print(results["result"])
print("*" * 79)
for doc in results["source_documents"]:
    print("-" * 79)
    print(doc.page_content)

In [6]:
retrieval_qa = RetrievalQA.from_chain_type(
    llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True
)

results = retrieval_qa.invoke(search_query)

print("*" * 79)
print(results["result"])
print("*" * 79)
for doc in results["source_documents"]:
    print("-" * 79)
    print(doc.page_content)

*******************************************************************************
Alphabet's revenue in Q2 2021 was $61.880 billion.
*******************************************************************************
-------------------------------------------------------------------------------
As we sharpen our focus, we&#39;ll continue to invest responsibly in deep computer science for the long-term.” Ruth Porat, CFO of Alphabet and Google, said: “Our consistent investments to support long-term growth are reflected in our solid performance in the second quarter, with revenues of <b>$69.7 billion</b> in the quarter, up 13% versus last year or 16% on a constant currency basis.
-------------------------------------------------------------------------------
Alphabet Inc. CONSOLIDATED STATEMENTS OF INCOME (In millions, except per share amounts, unaudited) Quarter Ended June 30, Year To Date June 30, 2021 2022 2021 2022 Revenues $ 61880 $ 69685 $ 117194 $ 137696 Costs and expenses: Cost of reve

In [7]:
retrieval_qa_with_sources = RetrievalQAWithSourcesChain.from_chain_type(
    llm=llm, chain_type="stuff", retriever=retriever
)

retrieval_qa_with_sources.invoke(search_query, return_only_outputs=True)

{'answer': "Alphabet's revenue in Q2 2021 was $61.880 billion.\n",
 'sources': 'gs://cloud-samples-data/gen-app-builder/search/alphabet-investor-pdfs/2021Q2_alphabet_earnings_release.pdf5, gs://cloud-samples-data/gen-app-builder/search/alphabet-investor-pdfs/2021Q2_alphabet_earnings_release.pdf2'}

In [8]:
multi_turn_retriever = VertexAIMultiTurnSearchRetriever(
    project_id=PROJECT_ID, location_id=DATA_STORE_LOCATION, data_store_id=DATA_STORE_ID
)
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
conversational_retrieval = ConversationalRetrievalChain.from_llm(
    llm=llm, retriever=multi_turn_retriever, memory=memory
)

search_query = "What were alphabet revenues in 2022?"

result = conversational_retrieval.invoke(search_query)
print(result["answer"])

  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)


Alphabet revenues in 2022 were $282,836 million.


In [9]:
new_query = "What about costs and expenses?"
result = conversational_retrieval.invoke(new_query)
print(result["answer"])

Alphabet's total costs and expenses in 2022 were $207,994 million.



In [10]:
new_query = "Is this more than in 2021?"

result = conversational_retrieval.invoke(new_query)
print(result["answer"])

Yes, Alphabet's total costs and expenses were $207,994 million in 2022, while in 2021 they were $178,923 million.



In [11]:
qa = RetrievalQA.from_chain_type(
    llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True
)

print(qa.combine_documents_chain.llm_chain.prompt.messages[0].prompt.template)

Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
{context}


In [12]:
prompt_template = """Use the context to answer the question at the end.
You must always use the context and context only to answer the question. Never try to make up an answer. If the context is empty or you do not know the answer, just say "I don't know".
The answer should consist of only 1 word and not a sentence.

Context: {context}

Question: {question}
Helpful Answer:
"""
prompt = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)
qa_chain = RetrievalQA.from_llm(
    llm=llm, prompt=prompt, retriever=retriever, return_source_documents=True
)

In [13]:
print(qa_chain.combine_documents_chain.llm_chain.prompt.template)

Use the context to answer the question at the end.
You must always use the context and context only to answer the question. Never try to make up an answer. If the context is empty or you do not know the answer, just say "I don't know".
The answer should consist of only 1 word and not a sentence.

Context: {context}

Question: {question}
Helpful Answer:



In [14]:
search_query = "Were 2020 EMEA revenues higher than 2020 APAC revenues?"

results = qa_chain.invoke(search_query)

print("*" * 79)
print(results["result"])
print("*" * 79)
for doc in results["source_documents"]:
    print("-" * 79)
    print(doc.page_content)

*******************************************************************************
Yes

*******************************************************************************
-------------------------------------------------------------------------------
Year Ended December 31, % Change from 2020 2021 Prior Year EMEA revenues $ $ 43% EMEA constant currency revenues 38 % APAC revenues 32550 42% APAC constant currency revenues 40% Other Americas revenues 14404 53% Other Americas constant currency revenues 52% United States revenues 85014 39% Hedging gains (losses) 149 Total revenues $ $ 41% Revenues, excluding hedging effect $ 182351 $ Exchange rate effect (3330) Total constant currency revenues $ 254158 39% EMEA revenue growth from 2020 to 2021 was favorably affected by foreign currency exchange rates, primarily due to the US dollar weakening relative to the Euro and British pound.
-------------------------------------------------------------------------------
Google Cloud&#39;s infrastructure an