In [None]:
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Question Answering with Large Documents using LlamaIndex on Google cloud

<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/generative-ai/blob/main/language/use-cases/document-qa/question_answering_documents_langchain.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Google Colaboratory logo"><br> Run in Colab
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/generative-ai/blob/main/language/use-cases/document-qa/question_answering_documents_langchain.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo"><br> View on GitHub
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/generative-ai/blob/main/language/use-cases/document-qa/question_answering_documents_langchain.ipynb">
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo"><br> Open in Vertex AI Workbench
    </a>
  </td>
</table>


| | |
|-|-|
|Author(s) | [Mona Mona](https://github.com/mona19) |

### Objective

In this tutorial, you learn how to use llamaindex

- Deploy Vertex AI vector search index
- Load document for question answering
- Chunk and embed documents using Vertex AI embeddings into Vector Search index.
- Use Gemini model to ask questions to the index
- Evalute faithfulness of the response
- Cleanup- delete the Vertex AI Vector Store

### Costs

This tutorial uses billable components of Google Cloud:


Learn about [Vertex AI pricing](https://cloud.google.com/vertex-ai/pricing),
and use the [Pricing Calculator](https://cloud.google.com/products/calculator/)
to generate a cost estimate based on your projected usage.

## Getting Started

### Install Vertex AI SDK for LLamaIndex, other packages and their dependencies

Install the following packages required to execute this notebook.

In [None]:
! pip install llama-index llama-index-vector-stores-vertexaivectorsearch llama-index-llms-vertex


Collecting llama-index-vector-stores-vertexaivectorsearch
  Downloading llama_index_vector_stores_vertexaivectorsearch-0.0.1-py3-none-any.whl (9.9 kB)
Collecting llama-index-llms-vertex
  Downloading llama_index_llms_vertex-0.1.8-py3-none-any.whl (7.3 kB)
Collecting google-cloud-storage<3.0.0,>=2.16.0 (from llama-index-vector-stores-vertexaivectorsearch)
  Downloading google_cloud_storage-2.16.0-py2.py3-none-any.whl (125 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m125.6/125.6 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting llama-index-embeddings-vertex<0.2.0,>=0.1.0 (from llama-index-vector-stores-vertexaivectorsearch)
  Downloading llama_index_embeddings_vertex-0.1.0-py3-none-any.whl (4.0 kB)
Collecting pyarrow<16.0.0,>=15.0.2 (from llama-index-llms-vertex)
  Downloading pyarrow-15.0.2-cp310-cp310-manylinux_2_28_x86_64.whl (38.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m38.3/38.3 MB[0m [31m25.3 MB/s[0m eta [36m0:00:0

### Colab only: Uncomment the following cell to restart the kernel.

***Colab only***: Run the following cell to restart the kernel or use the button to restart the kernel. For Vertex AI Workbench you can restart the terminal using the button on top.

In [None]:
# Automatically restart kernel after installs so that your environment can access the new packages
import IPython

app = IPython.Application.instance()
app.kernel.do_shutdown(True)

### Authenticating your notebook environment

- If you are using **Colab** to run this notebook, run the cell below and continue.
- If you are using **Vertex AI Workbench**, check out the setup instructions [here](https://github.com/GoogleCloudPlatform/generative-ai/tree/main/setup-env).

In [None]:
import sys

if "google.colab" in sys.modules:
    from google.colab import auth

    auth.authenticate_user()

- If you are running this notebook in a local development environment:
  - Install the [Google Cloud SDK](https://cloud.google.com/sdk).
  - Obtain authentication credentials. Create local credentials by running the following command and following the oauth2 flow (read more about the command [here](https://cloud.google.com/sdk/gcloud/reference/beta/auth/application-default/login)):

    ```bash
    gcloud auth application-default login
    ```

### Import libraries

**Colab only:** Run the following cell to initialize the Vertex AI SDK. For Vertex AI Workbench, you don't need to run this.

In [None]:
import vertexai

# Project and Storage Constants
PROJECT_ID = "<>"
REGION = "us-central1"
GCS_BUCKET_NAME = "your bucket name"
GCS_BUCKET_URI = f"gs://your bucket name"

# The number of dimensions for the textembedding-gecko@003 is 768
# If other embedder is used, the dimensions would probably need to change.
VS_DIMENSIONS = 768

# Vertex AI Vector Search Index configuration
# parameter description here
# https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform.MatchingEngineIndex#google_cloud_aiplatform_MatchingEngineIndex_create_tree_ah_index
VS_INDEX_NAME = "llamaindex-doc-index"  # @param {type:"string"}
VS_INDEX_ENDPOINT_NAME = "llamaindex-doc-endpoint"


from google.cloud import aiplatform

aiplatform.init(project=PROJECT_ID, location=REGION)  # @param {type:"string"

In [None]:
# Create a bucket.
! gsutil mb -l REGION−pREGION−pREGION -p PROJECT_ID $GCS_BUCKET_URI

Creating gs://faa_pdfs/...
BadRequestException: 400 The specified location constraint is not valid.


In [None]:
# NOTE : This operation can take upto 30 seconds

# check if index exists
index_names = [
    index.resource_name
    for index in aiplatform.MatchingEngineIndex.list(
        filter=f"display_name={VS_INDEX_NAME}"
    )
]

if len(index_names) == 0:
    print(f"Creating Vector Search index {VS_INDEX_NAME} ...")
    vs_index = aiplatform.MatchingEngineIndex.create_tree_ah_index(
        display_name=VS_INDEX_NAME,
        dimensions=VS_DIMENSIONS,
        distance_measure_type="DOT_PRODUCT_DISTANCE",
        shard_size="SHARD_SIZE_SMALL",
        index_update_method="STREAM_UPDATE",
        approximate_neighbors_count=100# allowed values BATCH_UPDATE , STREAM_UPDATE
    )
    print(
        f"Vector Search index {vs_index.display_name} created with resource name {vs_index.resource_name}"
    )
else:
    vs_index = aiplatform.MatchingEngineIndex(index_name=index_names[0])
    print(
        f"Vector Search index {vs_index.display_name} exists with resource name {vs_index.resource_name}"
    )

Vector Search index llamaindex-doc-index exists with resource name projects/474775107710/locations/us-central1/indexes/2289645003913297920



Create an Endpoint **bold text**
To use the index, you need to create an index endpoint. It works as a server instance accepting query requests for your index. An endpoint can be a public endpoint or a private endpoint.

Let's create a public endpoint.bold text

In [None]:
endpoint_names = [
    endpoint.resource_name
    for endpoint in aiplatform.MatchingEngineIndexEndpoint.list(
        filter=f"display_name={VS_INDEX_ENDPOINT_NAME}"
    )
]

if len(endpoint_names) == 0:
    print(
        f"Creating Vector Search index endpoint {VS_INDEX_ENDPOINT_NAME} ..."
    )
    vs_endpoint = aiplatform.MatchingEngineIndexEndpoint.create(
        display_name=VS_INDEX_ENDPOINT_NAME, public_endpoint_enabled=True
    )
    print(
        f"Vector Search index endpoint {vs_endpoint.display_name} created with resource name {vs_endpoint.resource_name}"
    )
else:
    vs_endpoint = aiplatform.MatchingEngineIndexEndpoint(
        index_endpoint_name=endpoint_names[0]
    )
    print(
        f"Vector Search index endpoint {vs_endpoint.display_name} exists with resource name {vs_endpoint.resource_name}"
    )

Vector Search index endpoint llamaindex-doc-endpoint exists with resource name projects/474775107710/locations/us-central1/indexEndpoints/2278948954798292992


**Deploy Index to the Endpoint¶**
With the index endpoint, deploy the index by specifying a unique deployed index ID.

NOTE : This operation can take upto 30 minutes.

In [None]:
# check if endpoint exists
index_endpoints = [
    (deployed_index.index_endpoint, deployed_index.deployed_index_id)
    for deployed_index in vs_index.deployed_indexes
]

if len(index_endpoints) == 0:
    print(
        f"Deploying Vector Search index {vs_index.display_name} at endpoint {vs_endpoint.display_name} ..."
    )
    vs_deployed_index = vs_endpoint.deploy_index(
    index=vs_index,
    deployed_index_id="new_deployed_index_id",
    display_name=VS_INDEX_NAME,
    machine_type="e2-standard-16",
    min_replica_count=1,
    max_replica_count=1,
    )
    print( f"Vector Search index {vs_index.display_name} is deployed at endpoint {vs_deployed_index.display_name}"
    )
else:
    vs_deployed_index = aiplatform.MatchingEngineIndexEndpoint(
        index_endpoint_name=index_endpoints[0][0]
    )
    print(
        f"Vector Search index {vs_index.display_name} is already deployed at endpoint {vs_deployed_index.display_name}"
    )

Vector Search index llamaindex-doc-index is already deployed at endpoint llamaindex-doc-endpoint


In [None]:
# import modules needed
from llama_index.core import (
    StorageContext,
    Settings,
    VectorStoreIndex,
    SimpleDirectoryReader,
)
from llama_index.core.schema import TextNode
from llama_index.core.vector_stores.types import (
    MetadataFilters,
    MetadataFilter,
    FilterOperator,
)
from llama_index.llms.vertex import Vertex
from llama_index.embeddings.vertex import VertexTextEmbedding
from llama_index.vector_stores.vertexaivectorsearch import VertexAIVectorStore

**Parse, Index and Query PDFs using Vertex AI Vector Search and Gemini Pro**

In [None]:
! mkdir -p ./data/arxiv/
! wget 'https://arxiv.org/pdf/1706.03762.pdf' -O ./data/arxiv/test.pdf

--2024-06-07 14:47:47--  https://arxiv.org/pdf/1706.03762.pdf
Resolving arxiv.org (arxiv.org)... 151.101.3.42, 151.101.195.42, 151.101.67.42, ...
Connecting to arxiv.org (arxiv.org)|151.101.3.42|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: http://arxiv.org/pdf/1706.03762 [following]
--2024-06-07 14:47:47--  http://arxiv.org/pdf/1706.03762
Connecting to arxiv.org (arxiv.org)|151.101.3.42|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2215244 (2.1M) [application/pdf]
Saving to: ‘./data/arxiv/test.pdf’


2024-06-07 14:47:47 (26.1 MB/s) - ‘./data/arxiv/test.pdf’ saved [2215244/2215244]



In [None]:
# load documents
documents = SimpleDirectoryReader("./data/arxiv/").load_data()
print(f"# of documents = {len(documents)}")

# of documents = 15


! mkdir -p ./data/arxiv/
! wget 'https://arxiv.org/pdf/1706.03762.pdf' -O ./data/arxiv/test.pdf

### Import models and intiatilze Vector Store

You load the pre-trained text and embeddings generation model

In [None]:
# setup storage
vector_store = VertexAIVectorStore(
    project_id=PROJECT_ID,
    region=REGION,
    index_id=vs_index.resource_name,
    endpoint_id=vs_endpoint.resource_name,
    gcs_bucket_name=GCS_BUCKET_NAME,
)

# set storage context
storage_context = StorageContext.from_defaults(vector_store=vector_store)

# configure embedding model
embed_model = VertexTextEmbedding(
    model_name="textembedding-gecko@003",
    project=PROJECT_ID,
    location=REGION,
)

vertex_gemini = Vertex(model="gemini-pro", temperature=0, additional_kwargs={})



In [None]:
# define index from vector store
index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context,embed_model=embed_model
)


INFO:google.cloud.aiplatform.matching_engine.matching_engine_index:Upserting datapoints MatchingEngineIndex index: projects/474775107710/locations/us-central1/indexes/2289645003913297920
INFO:google.cloud.aiplatform.matching_engine.matching_engine_index:MatchingEngineIndex index Upserted datapoints. Resource name: projects/474775107710/locations/us-central1/indexes/2289645003913297920


In [None]:
query_engine = index.as_query_engine()

**Set up Query engine with Gemini **

In [None]:
llm = vertex_gemini
query_engine = index.as_query_engine(
    llm=llm,
    similarity_top_k=3,
)

In [None]:
response = query_engine.query(
    "who are the authors of paper Attention is All you need?"
)

print(f"Response:")
print("-" * 80)
print(response.response)
print("-" * 80)
print(f"Source Documents:")
print("-" * 80)
for source in response.source_nodes:
    print(f"Sample Text: {source.text[:50]}")
    print(f"Relevance score: {source.get_score():.3f}")
    print(f"File Name: {source.metadata.get('file_name')}")
    print(f"Page #: {source.metadata.get('page_label')}")
    print(f"File Path: {source.metadata.get('file_path')}")
    print("-" * 80)

Response:
--------------------------------------------------------------------------------
The authors of the paper "Attention Is All You Need" are:

* Ashish Vaswani
* Noam Shazeer
* Niki Parmar
* Jakob Uszkoreit
* Llion Jones
* Aidan N. Gomez
* Łukasz Kaiser
* Illia Polosukhin
--------------------------------------------------------------------------------
Source Documents:
--------------------------------------------------------------------------------
Sample Text: Provided proper attribution is provided, Google he
Relevance score: 0.719
File Name: test.pdf
Page #: 1
File Path: /content/data/arxiv/test.pdf
--------------------------------------------------------------------------------
Sample Text: length nis smaller than the representation dimensi
Relevance score: 0.687
File Name: test.pdf
Page #: 7
File Path: /content/data/arxiv/test.pdf
--------------------------------------------------------------------------------


**Response Evaluation**

Does the response match the retrieved context? Does it also match the query? Does it match the reference answer or guidelines? Here's a simple example that evaluates a single response for Faithfulness, i.e. whether the response is aligned to the context, such as being free from hallucinations:

In [None]:
from llama_index.core.evaluation import FaithfulnessEvaluator

In [None]:
# define evaluator
evaluator = FaithfulnessEvaluator(llm=llm)

In [None]:
# query index
!pip install nest_asyncio




The response contains both the response and the source from which the response was generated; the evaluator compares them and determines if the response is faithful to the source.

In [None]:
import nest_asyncio
nest_asyncio.apply()
response = query_engine.query(
    "who are the authors of paper Attention is All you need?"
)
eval_result = evaluator.evaluate_response(response=response)
print(str(eval_result.passing))

True



**Clean Up**

Please delete Vertex AI Vector Search Index and Index Endpoint after running your experiments to avoid incurring additional charges. Please note that you will be charged as long as the endpoint is running.

⚠️ NOTE: Enabling `CLEANUP_RESOURCES` flag deletes Vector Search Index, Index Endpoint and Cloud Storage bucket. Please run it with caution.

In [61]:
CLEANUP_RESOURCES = True

Undeploy indexes and Delete index endpoint

In [62]:
if CLEANUP_RESOURCES:
    print(
        f"Undeploying all indexes and deleting the index endpoint {vs_endpoint.display_name}"
    )
    vs_endpoint.undeploy_all()
    vs_endpoint.delete()

Undeploying all indexes and deleting the index endpoint llamaindex-doc-endpoint


INFO:google.cloud.aiplatform.matching_engine.matching_engine_index_endpoint:Undeploying MatchingEngineIndexEndpoint index_endpoint: projects/474775107710/locations/us-central1/indexEndpoints/2278948954798292992
INFO:google.cloud.aiplatform.matching_engine.matching_engine_index_endpoint:Undeploy MatchingEngineIndexEndpoint index_endpoint backing LRO: projects/474775107710/locations/us-central1/indexEndpoints/2278948954798292992/operations/7024100600912543744
INFO:google.cloud.aiplatform.matching_engine.matching_engine_index_endpoint:MatchingEngineIndexEndpoint index_endpoint undeployed. Resource name: projects/474775107710/locations/us-central1/indexEndpoints/2278948954798292992
INFO:google.cloud.aiplatform.base:Deleting MatchingEngineIndexEndpoint : projects/474775107710/locations/us-central1/indexEndpoints/2278948954798292992
INFO:google.cloud.aiplatform.base:Delete MatchingEngineIndexEndpoint  backing LRO: projects/474775107710/locations/us-central1/indexEndpoints/2278948954798292992

In [63]:
if CLEANUP_RESOURCES:
    print(f"Deleting the index {vs_index.display_name}")
    vs_index.delete()

INFO:google.cloud.aiplatform.base:Deleting MatchingEngineIndex : projects/474775107710/locations/us-central1/indexes/2289645003913297920


Deleting the index llamaindex-doc-index


INFO:google.cloud.aiplatform.base:Delete MatchingEngineIndex  backing LRO: projects/474775107710/locations/us-central1/indexes/2289645003913297920/operations/2752928935560871936
INFO:google.cloud.aiplatform.base:MatchingEngineIndex deleted. . Resource name: projects/474775107710/locations/us-central1/indexes/2289645003913297920


In [64]:
if CLEANUP_RESOURCES and "GCS_BUCKET_NAME" in globals():
    print(f"Deleting contents from the Cloud Storage bucket {GCS_BUCKET_NAME}")

    shell_output = ! gsutil du -ash gs://GCS_BUCKET_NAME    print(shell_output)    print(        f"Size of the bucket {GCS_BUCKET_NAME} before deleting = {' '.join(shell_output[0].split()[:2])}"    )    # uncomment below line to delete contents of the bucket    # ! gsutil -m rm -r gs://GCS_BUCKET_NAME    print(shell_output)    print(        f"Size of the bucket {GCS_BUCKET_NAME} before deleting = {' '.join(shell_output[0].split()[:2])}"    )    # uncomment below line to delete contents of the bucket    # ! gsutil -m rm -r gs://GCS_BUCKET_NAME
    print(shell_output)
    print(
        f"Size of the bucket {GCS_BUCKET_NAME} before deleting = {' '.join(shell_output[0].split()[:2])}"
    )

    # uncomment below line to delete contents of the bucket
    # ! gsutil -m rm -r gs://GCS_BUCKET_NAME

Deleting contents from the Cloud Storage bucket faa_pdfs
["/bin/bash: -c: line 1: syntax error near unexpected token `('", '/bin/bash: -c: line 1: ` gsutil du -ash gs://GCS_BUCKET_NAME\xa0\xa0\xa0\xa0print(shell_output)\xa0\xa0\xa0\xa0print(\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0f"Size\xa0of\xa0the\xa0bucket\xa0{GCS_BUCKET_NAME}\xa0before\xa0deleting\xa0=\xa0{\'\xa0\'.join(shell_output[0].split()[:2])}"\xa0\xa0\xa0\xa0)\xa0\xa0\xa0\xa0#\xa0uncomment\xa0below\xa0line\xa0to\xa0delete\xa0contents\xa0of\xa0the\xa0bucket\xa0\xa0\xa0\xa0#\xa0!\xa0gsutil\xa0-m\xa0rm\xa0-r\xa0gs://GCS_BUCKET_NAME\xa0\xa0\xa0\xa0print(shell_output)\xa0\xa0\xa0\xa0print(\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0f"Size\xa0of\xa0the\xa0bucket\xa0{GCS_BUCKET_NAME}\xa0before\xa0deleting\xa0=\xa0{\'\xa0\'.join(shell_output[0].split()[:2])}"\xa0\xa0\xa0\xa0)\xa0\xa0\xa0\xa0#\xa0uncomment\xa0below\xa0line\xa0to\xa0delete\xa0contents\xa0of\xa0the\xa0bucket\xa0\xa0\xa0\xa0#\xa0!\xa0gsutil\xa0-m\xa0rm\xa0-r\xa0gs://GCS_BUCKET_NAME\'']
Siz