![](load_qa_chain.png)

In [1]:
PROJECT_ID = "cloud-llm-preview1"  # @param {type:"string"}
LOCATION = "us-central1" # @param {type:"string"}

import langchain
from google.cloud import aiplatform
import vertexai
vertexai.init(project=PROJECT_ID, location=LOCATION)

from genai import VertexLLM, VertexChat, VertexMultiTurnChat, VertexEmbeddings

REQUESTS_PER_MINUTE = 100

llm = VertexLLM(
    model_name='text-bison@001',
    max_output_tokens=256,
    temperature=0.1,
    top_p=0.8,
    top_k=40,
    verbose=True,
)

chat = VertexChat()

mchat = VertexMultiTurnChat(max_output_tokens=1024)

embedding = VertexEmbeddings(requests_per_minute=REQUESTS_PER_MINUTE)

In [2]:
embedding

VertexEmbeddings(model_name='textembedding-gecko@001', model=<class 'vertexai.language_models._language_models.TextEmbeddingModel'>, requests_per_minute=100)

In [5]:
# Ingest PDF files
from langchain.document_loaders import PyPDFLoader

# Load GOOG's 10K annual report (92 pages).
url = "https://storage.googleapis.com/vtxdemos-datasets-public/20230203_alphabet_10K.pdf/20230203_alphabet_10K.pdf"
loader = PyPDFLoader(url)
documents = loader.load()

In [6]:
# split the documents into chunks
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)
print(f"# of documents = {len(docs)}")

# of documents = 451


In [7]:
# Store docs in local vectorstore as index
# it may take a while since API is rate limited
from langchain.vectorstores import Chroma

db = Chroma.from_documents(docs, embedding)

Waiting
.................................................................................................................................................................................................................................

In [8]:
# Expose index to the retriever
retriever = db.as_retriever(
    search_type="similarity",
    search_kwargs={"k":2})

In [9]:
# Create chain to answer questions
from langchain.chains import RetrievalQA

# Uses LLM to synthesize results from the search index.
# We use Vertex PaLM Text API for LLM
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True)

query = "What was Alphabet's net income in 2022?"
result = qa({"query": query})
print(result)

Waiting
{'query': "What was Alphabet's net income in 2022?", 'result': "The question is asking for Alphabet's net income in 2022. However, the document only provides information for the years 2016, 2017, and 2018. Therefore, the answer to the question is not available.", 'source_documents': [Document(page_content='6/27/23, 8:19 AM Document\nhttps://www .sec.gov/Ar chives/edgar/data/1652044/000165204419000004/goog10-kq42018.htm 84/154Table of Contents Alphabet Inc.\nAlphabet Inc.\nCONSOLIDATED STATEMENTS OF INCOME\n(In millions, except per share amounts)\n Year Ended December 31,\n 2016  2017  2018\nRevenues $ 90,272 $ 110,855 $ 136,819\nCosts and expenses:    \nCost of revenues 35,138 45,583 59,549\nResearch and development 13,948 16,625 21,419\nSales and marketing 10,485 12,893 16,333\nGeneral and administrative 6,985 6,872 8,126\nEuropean Commission fines 0 2,736 5,071\nTotal costs and expenses 66,556 84,709 110,498\nIncome from operations 23,716 26,146 26,321\nOther income (expense)

![](alphabet_income.png)

In [10]:
query = "How much office space reduction took place in 2023?"
result = qa({"query": query})
print(result)

Waiting
{'query': 'How much office space reduction took place in 2023?', 'result': 'The question is not answerable because the context does not provide information about office space reduction in 2023.', 'source_documents': [Document(page_content='centers, as well as, servers to provide capacity for the growth of our businesses. Generally, our investment in office\nfacilities is driven by workforce needs; and our investment in data centers is driven by our compute and storage\nrequirements and has a lead time of up to three years. Further, the decrease was partially offset by an increase in\npayments for acquisitions and a decrease in maturities and sales of marketable securities.\nNet cash used in investing activities increased slightly from 2016 to 2017 primarily due to an increase in purchases of\nmarketable securities and an increase in purchases of property and equipment, partially offset by an increase in the\nmaturities and sales of marketable securities, a decrease in cash coll

![](red.png)