![](load_qa_chain.png)

In [1]:
PROJECT_ID = "cloud-llm-preview1"  # @param {type:"string"}
LOCATION = "us-central1" # @param {type:"string"}

import langchain
from google.cloud import aiplatform
import vertexai
vertexai.init(project=PROJECT_ID, location=LOCATION)

from genai import VertexLLM, VertexChat, VertexMultiTurnChat, VertexEmbeddings

REQUESTS_PER_MINUTE = 100

llm = VertexLLM(
    model_name='text-bison@001',
    max_output_tokens=256,
    temperature=0.1,
    top_p=0.8,
    top_k=40,
    verbose=True,
)

chat = VertexChat()

mchat = VertexMultiTurnChat(max_output_tokens=1024)

embedding = VertexEmbeddings(requests_per_minute=REQUESTS_PER_MINUTE)

In [2]:
embedding

VertexEmbeddings(model_name='textembedding-gecko@001', model=<class 'vertexai.language_models._language_models.TextEmbeddingModel'>, requests_per_minute=100)

In [3]:
# Ingest PDF files
from langchain.document_loaders import PyPDFLoader

# Load GOOG's 10K annual report (92 pages).
url = "https://abc.xyz/investor/static/pdf/20230203_alphabet_10K.pdf"
loader = PyPDFLoader(url)
documents = loader.load()

In [4]:
# split the documents into chunks
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)
print(f"# of documents = {len(docs)}")

# of documents = 384


In [5]:
# Store docs in local vectorstore as index
# it may take a while since API is rate limited
from langchain.vectorstores import Chroma

db = Chroma.from_documents(docs, embedding)

Using embedded DuckDB without persistence: data will be transient


Waiting
..............................................................................................................................................................................................

In [6]:
# Expose index to the retriever
retriever = db.as_retriever(
    search_type="similarity",
    search_kwargs={"k":2})

In [7]:
# Create chain to answer questions
from langchain.chains import RetrievalQA

# Uses LLM to synthesize results from the search index.
# We use Vertex PaLM Text API for LLM
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True)

query = "What was Alphabet's net income in 2022?"
result = qa({"query": query})
print(result)

Waiting
{'query': "What was Alphabet's net income in 2022?", 'result': "Alphabet's net income in 2022 was $59,972.", 'source_documents': [Document(page_content='Alphabet Inc.\nCONSOLIDATED STATEMENTS OF INCOME\n(in millions, except per share amounts)\n Year Ended December 31,\n 2020 2021 2022\nRevenues $ 182,527 $ 257,637 $ 282,836 \nCosts and expenses:\nCost of revenues  84,732  110,939  126,203 \nResearch and development  27,573  31,562  39,500 \nSales and marketing  17,946  22,912  26,567 \nGeneral and administrative  11,052  13,510  15,724 \nTotal costs and expenses  141,303  178,923  207,994 \nIncome from operations  41,224  78,714  74,842 \nOther income (expense), net  6,858  12,020  (3,514) \nIncome before income taxes  48,082  90,734  71,328 \nProvision for income taxes  7,813  14,701  11,356 \nNet income $ 40,269 $ 76,033 $ 59,972 \nBasic net income per share of Class A, Class B, and Class C stock $ 2.96 $ 5.69 $ 4.59 \nDiluted net income per share of Class A, Class B, and Cla

![](alphabet_income.png)

In [8]:
query = "How much office space reduction took place in 2023?"
result = qa({"query": query})
print(result)

Waiting
{'query': 'How much office space reduction took place in 2023?', 'result': 'The office space reduction in 2023 was approximately $0.5 billion.', 'source_documents': [Document(page_content='incur employee severance and related charges of $1.9 billion  to $2.3 billion , the majority of which will be recognized in \nthe first quarter of 2023.\nIn addition, we are taking actions to optimize our global office space. As a result we expect to incur exit costs \nrelating to office space reductions of approximately $0.5 billion  in the first quarter of 2023.  We may incur additional \ncharges in the future as we further evaluate our real estate needs. Table of Contents Alphabet Inc.\n83', metadata={'source': '/tmp/tmpqnhzq8ue', 'page': 83}), Document(page_content='Note 11  of the Notes to Consolidated Financial Statements included in Item 8 of this Annual Report on Form \n10-K for additional information.\n•Operating cash flow was $91.5 billion  for the year ended December 31, 2022.\n•Ca

![](red.png)