# Using Vertex AI Search as a RAG

In [1]:
#instalando discovery engine api
! pip install --upgrade google-cloud-discoveryengine -q --user

[0m

In [2]:
# Restart kernel after installs so that your environment can access the new packages
import IPython
import time

app = IPython.Application.instance()
app.kernel.do_shutdown(True)

{'status': 'ok', 'restart': True}

In [2]:
# Importa a biblioteca discoveryengine_v1alpha do pacote google.cloud.
# Esta biblioteca é usada para interagir com o Google Cloud Discovery Engine.
from google.cloud import discoveryengine_v1alpha as discoveryengine

# Importa a classe ClientOptions do pacote google.api_core.client_options.
# Esta classe é usada para configurar opções de cliente para a API do Google Cloud.
from google.api_core.client_options import ClientOptions

# Define a variável PROJECT_ID com o valor "qwiklabs-gcp-01-8d18bf5191a5".
# Esta variável representa o ID do projeto no Google Cloud.
PROJECT_ID = !gcloud config get-value project
PROJECT_ID = PROJECT_ID[0].strip()

print(f"PROJECT_ID: {PROJECT_ID}")

# Define a variável LOCATION com o valor "global".
# Esta variável representa a localização dos recursos no Google Cloud.
LOCATION = "global"


PROJECT_ID: qwiklabs-gcp-01-8d18bf5191a5


In [3]:
def create_data_store(
    project_id: str, location: str, data_store_name: str, data_store_id: str
):
    # Cria um cliente
    client_options = (
        ClientOptions(api_endpoint=f"{location}-discoveryengine.googleapis.com")
        if location != "global"
        else None
    )
    client = discoveryengine.DataStoreServiceClient(client_options=client_options)

    # Inicializa os argumentos da requisição
    data_store = discoveryengine.DataStore(
        display_name=data_store_name,
        industry_vertical="GENERIC",
        content_config="CONTENT_REQUIRED",
    )

    request = discoveryengine.CreateDataStoreRequest(
        parent=discoveryengine.DataStoreServiceClient.collection_path(
            project_id, location, "default_collection"
        ),
        data_store=data_store,
        data_store_id=data_store_id,
    )
    operation = client.create_data_store(request=request)

    # Faz a requisição
    # O bloco try é necessário para evitar que a execução pare devido a um erro lançado quando o datastore leva um tempo para ser instanciado
    try:
        response = operation.result(timeout=90)
    except:
        print("long-running operation")


In [4]:
# O nome do datastore só pode conter letras minúsculas, números e hífens
DATASTORE_NAME = "alphabet-contracts"
DATASTORE_ID = f"{DATASTORE_NAME}-id"

# Chama a função create_data_store para criar o DataStore com os parâmetros fornecidos
create_data_store(PROJECT_ID, LOCATION, DATASTORE_NAME, DATASTORE_ID)


I0000 00:00:1725840951.399357     781 config.cc:230] gRPC experiments enabled: call_status_override_on_cancellation, event_engine_dns, event_engine_listener, http2_stats_fix, monitoring_experiment, pick_first_new, trace_record_callops, work_serializer_clears_time_cache


In [5]:
def import_documents(
    project_id: str,
    location: str,
    data_store_id: str,
    gcs_uri: str,
):
    # Cria um cliente
    client_options = (
        ClientOptions(api_endpoint=f"{location}-discoveryengine.googleapis.com")
        if location != "global"
        else None
    )
    client = discoveryengine.DocumentServiceClient(client_options=client_options)

    # O nome completo do recurso do branch do mecanismo de busca.
    # e.g. projects/{project}/locations/{location}/dataStores/{data_store_id}/branches/{branch}
    parent = client.branch_path(
        project=project_id,
        location=location,
        data_store=data_store_id,
        branch="default_branch",
    )

    source_documents = [f"{gcs_uri}/*"]

    request = discoveryengine.ImportDocumentsRequest(
        parent=parent,
        gcs_source=discoveryengine.GcsSource(
            input_uris=source_documents, data_schema="content"
        ),
        # Opções: `FULL`, `INCREMENTAL`
        reconciliation_mode=discoveryengine.ImportDocumentsRequest.ReconciliationMode.INCREMENTAL,
    )

    # Faz a requisição
    operation = client.import_documents(request=request)

    response = operation.result()

    # Uma vez que a operação esteja completa,
    # obtenha informações do metadado da operação
    metadata = discoveryengine.ImportDocumentsMetadata(operation.metadata)

    # Lida com a resposta
    return operation.operation.name


In [6]:

source_documents_gs_uri = (
    "gs://cloud-samples-data/gen-app-builder/search/alphabet-investor-pdfs"
)

import_documents(PROJECT_ID, LOCATION, DATASTORE_ID, source_documents_gs_uri)

'projects/954928940146/locations/global/collections/default_collection/dataStores/alphabet-contracts-id/branches/0/operations/import-documents-10085668403139396693'

In [7]:
def create_engine(
    project_id: str, location: str, data_store_name: str, data_store_id: str
):
    # Cria um cliente
    client_options = (
        ClientOptions(api_endpoint=f"{location}-discoveryengine.googleapis.com")
        if location != "global"
        else None
    )
    client = discoveryengine.EngineServiceClient(client_options=client_options)

    # Inicializa os argumentos da requisição
    config = discoveryengine.Engine.SearchEngineConfig(
        search_tier="SEARCH_TIER_ENTERPRISE",  # Potencialmente necessário para LLM
        search_add_ons=["SEARCH_ADD_ON_LLM"]
    )

    engine = discoveryengine.Engine(
        display_name=data_store_name,
        solution_type="SOLUTION_TYPE_SEARCH",
        industry_vertical="GENERIC",
        data_store_ids=[data_store_id],
        search_engine_config=config,
    )

    request = discoveryengine.CreateEngineRequest(
        parent=discoveryengine.DataStoreServiceClient.collection_path(
            project_id, location, "default_collection"
        ),
        engine=engine,
        engine_id=engine.display_name,
    )

    # Faz a requisição
    operation = client.create_engine(request=request)
    response = operation.result(timeout=90)


In [8]:
#chamando a função de criar search engine
create_engine(PROJECT_ID, LOCATION, DATASTORE_NAME, DATASTORE_ID)

In [None]:
#importar list
from typing import List

#retorna uma lista de objetos
def search_sample(
    project_id: str,
    location: str,
    data_store_id: str,
    search_query: str,
) -> List[discoveryengine.SearchResponse]:
    #  Para mais informações, consulte:
    # https://cloud.google.com/generative-ai-app-builder/docs/locations#specify_a_multi-region_for_your_data_store
    client_options = (
        ClientOptions(api_endpoint=f"{location}-discoveryengine.googleapis.com")
        if location != "global"
        else None
    )

    # Cria um cliente
    client = discoveryengine.SearchServiceClient(client_options=client_options)

    # O nome completo do recurso da configuração de serviço de busca
    # e.g. projects/{project_id}/locations/{location}/dataStores/{data_store_id}/servingConfigs/{serving_config_id}
    serving_config = client.serving_config_path(
        project=project_id,
        location=location,
        data_store=data_store_id,
        serving_config="default_config",
    )

    # Opcional: Opções de configuração para a busca
    # Consulte a referência `ContentSearchSpec` para todos os campos suportados:
    # https://cloud.google.com/python/docs/reference/discoveryengine/latest/google.cloud.discoveryengine_v1.types.SearchRequest.ContentSearchSpec
    content_search_spec = discoveryengine.SearchRequest.ContentSearchSpec(
        # Para informações sobre snippets, consulte:
        # https://cloud.google.com/generative-ai-app-builder/docs/snippets
        snippet_spec=discoveryengine.SearchRequest.ContentSearchSpec.SnippetSpec(
            return_snippet=True
        ),
        # Para informações sobre resumos de busca, consulte:
        # https://cloud.google.com/generative-ai-app-builder/docs/get-search-summaries
        summary_spec=discoveryengine.SearchRequest.ContentSearchSpec.SummarySpec(
            summary_result_count=5,
            include_citations=True,
            ignore_adversarial_query=True,
            ignore_non_summary_seeking_query=True,
        ),
    )

    # Consulte a referência `SearchRequest` para todos os campos suportados:
    # https://cloud.google.com/python/docs/reference/discoveryengine/latest/google.cloud.discoveryengine_v1.types.SearchRequest
    request = discoveryengine.SearchRequest(
        serving_config=serving_config,
        query=search_query,
        page_size=10,
        content_search_spec=content_search_spec,
        query_expansion_spec=discoveryengine.SearchRequest.QueryExpansionSpec(
            condition=discoveryengine.SearchRequest.QueryExpansionSpec.Condition.AUTO,
        ),
        spell_correction_spec=discoveryengine.SearchRequest.SpellCorrectionSpec(
            mode=discoveryengine.SearchRequest.SpellCorrectionSpec.Mode.AUTO
        ),
    )

    response = client.search(request)
    return response


In [None]:
query = "When google start your operation?"

print(search_sample(PROJECT_ID, LOCATION, DATASTORE_ID, query).summary.summary_text)