Link: [**Create a search data store**](https://cloud.google.com/generative-ai-app-builder/docs/create-data-store-es)

In [None]:
#!pip install --upgrade google-cloud-aiplatform
#!pip install --upgrade google-cloud-discoveryengine

Collecting google-cloud-discoveryengine
  Downloading google_cloud_discoveryengine-0.13.6-py3-none-any.whl.metadata (5.3 kB)
Downloading google_cloud_discoveryengine-0.13.6-py3-none-any.whl (2.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.9/2.9 MB[0m [31m31.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: google-cloud-discoveryengine
Successfully installed google-cloud-discoveryengine-0.13.6


# Create a Datastore

In [None]:
import os
from google.api_core.client_options import ClientOptions
from google.cloud import discoveryengine

def create_data_store_sample(
    project_id: str,
    location: str,
    data_store_id: str,
    datastore_name: str
) -> str:
    #  For more information, refer to:
    # https://cloud.google.com/generative-ai-app-builder/docs/locations#specify_a_multi-region_for_your_data_store
    client_options = (
        ClientOptions(api_endpoint=f"{location}-discoveryengine.googleapis.com")
        if location != "global"
        else None
    )

    # Create a client
    client = discoveryengine.DataStoreServiceClient(client_options=client_options)

    # The full resource name of the collection
    # e.g. projects/{project}/locations/{location}/collections/default_collection
    parent = client.collection_path(
        project=project_id,
        location=location,
        collection="default_collection",
    )

    data_store = discoveryengine.DataStore(
        display_name= datastore_name,
        # Options: GENERIC, MEDIA, HEALTHCARE_FHIR
        industry_vertical=discoveryengine.IndustryVertical.GENERIC,
        # Options: SOLUTION_TYPE_RECOMMENDATION, SOLUTION_TYPE_SEARCH, SOLUTION_TYPE_CHAT, SOLUTION_TYPE_GENERATIVE_CHAT
        solution_types=[discoveryengine.SolutionType.SOLUTION_TYPE_CHAT],
        # TODO(developer): Update content_config based on data store type.
        # Options: NO_CONTENT, CONTENT_REQUIRED, PUBLIC_WEBSITE
        content_config=discoveryengine.DataStore.ContentConfig.CONTENT_REQUIRED,
    )

    request = discoveryengine.CreateDataStoreRequest(
        parent=parent,
        data_store_id=data_store_id,
        data_store=data_store,
        # Optional: For Advanced Site Search Only
        # create_advanced_site_search=True,
    )

    # Make the request
    operation = client.create_data_store(request=request)

    print(f"Waiting for operation to complete: {operation.operation.name}")
    response = operation.result()

    # After the operation is complete,
    # get information from operation metadata
    metadata = discoveryengine.CreateDataStoreMetadata(operation.metadata)

    # Handle the response
    print(response)
    print(metadata)

    return operation.operation.name

In [None]:
project_id = "[your-project-id]"  # @param {type: "string", placeholder: "[your-project-id]", isTemplate: true}
if not project_id or project_id == "[your-project-id]":
    project_id = str(os.environ.get("GOOGLE_CLOUD_PROJECT"))

location = "global" #@param {type: "string", placeholder: "us-central1", isTemplate: true}

#  datastore_id = "projects/eikon-dev-ai-team/locations/global/collections/default_collection/dataStores/datastore-00_1738647792741" # @param
datastore_id = "datastorechat-eikon-devai" #@param
# datastore_id = "demo-doc_1717977266116"
# datastore_id = "data-store-chat_1740372511341" #@param

datastore_name = "Chat Data Store" #@param {type: "string", placeholder: "Chat Data Store", isTemplate: true}

In [None]:
gcs_bucket_name = "demo_bucket_agent_builder_0" #@param
gcs_path_name = "faq-document" #@param
gcs_pdf_file_name = "Build a Gemini-Powered YouTube Summarizer  _  Google Codelabs.pdf" #@param

# gcs_uri = f"gs://{gcs_bucket_name}/{gcs_pdf_file_name}"
gcs_uri = f"gs://{gcs_bucket_name}/{gcs_path_name}/*.pdf"



---



In [None]:
create_data_store_sample(
    project_id=project_id,
    location=location,
    data_store_id=datastore_id,
)

Waiting for operation to complete: projects/158103152291/locations/global/collections/default_collection/operations/create-data-store-15599008082268783659
name: "projects/158103152291/locations/global/collections/default_collection/dataStores/datastorechat-eikon-devai"
display_name: "My Data Store Chat"
industry_vertical: GENERIC
solution_types: SOLUTION_TYPE_CHAT
default_schema_id: "default_schema"
content_config: CONTENT_REQUIRED
document_processing_config {
  name: "projects/158103152291/locations/global/collections/default_collection/dataStores/datastorechat-eikon-devai/documentProcessingConfig"
  default_parsing_config {
    digital_parsing_config {
    }
  }
}
serving_config_data_store {
}




'projects/158103152291/locations/global/collections/default_collection/operations/create-data-store-15599008082268783659'

# Import documents

In [None]:
# TODO(developer): Uncomment these variables before running the sample.
# project_id = "YOUR_PROJECT_ID"
# location = "YOUR_LOCATION" # Values: "global"
# datastore_id = "YOUR_DATA_STORE_ID"

# Examples:
# - Unstructured documents
#   - `gs://bucket/directory/file.pdf`
#   - `gs://bucket/directory/*.pdf`
# - Unstructured documents with JSONL Metadata
#   - `gs://bucket/directory/file.json`
# - Unstructured documents with CSV Metadata
#   - `gs://bucket/directory/file.csv`
# gcs_uri = "YOUR_GCS_PATH"

#  For more information, refer to:
# https://cloud.google.com/generative-ai-app-builder/docs/locations#specify_a_multi-region_for_your_data_store
client_options = (
    ClientOptions(api_endpoint=f"{location}-discoveryengine.googleapis.com")
    if location != "global"
    else None
)

# Create a client
client = discoveryengine.DocumentServiceClient(client_options=client_options)

# The full resource name of the search engine branch.
# e.g. projects/{project}/locations/{location}/dataStores/{datastore_id}/branches/{branch}
parent = client.branch_path(
    project=project_id,
    location=location,
    data_store=datastore_id,
    branch="default_branch",
)

request = discoveryengine.ImportDocumentsRequest(
    parent=parent,
    gcs_source=discoveryengine.GcsSource(
        # Multiple URIs are supported
        input_uris=[gcs_uri],
        # Options:
        # - `content` - Unstructured documents (PDF, HTML, DOC, TXT, PPTX)
        # - `custom` - Unstructured documents with custom JSONL metadata
        # - `document` - Structured documents in the discoveryengine.Document format.
        # - `csv` - Unstructured documents with CSV metadata
        data_schema="content",
    ),
    # Options: `FULL`, `INCREMENTAL`
    reconciliation_mode=discoveryengine.ImportDocumentsRequest.ReconciliationMode.INCREMENTAL,
)

# Make the request
operation = client.import_documents(request=request)

print(f"Waiting for operation to complete: {operation.operation.name}")
response = operation.result()

# After the operation is complete,
# get information from operation metadata
metadata = discoveryengine.ImportDocumentsMetadata(operation.metadata)

# Handle the response
print(response)
print(metadata)

In [None]:
def import_documents_gcs(
    project_id: str, location: str, datastore_id: str, gcs_uri: str
) -> str:
    """Imports documents from Google Cloud Storage (GCS) into a Discovery Engine datastore.

    Args:
        project_id: Your Google Cloud Project ID.
        location: The location of your Discovery Engine datastore (e.g., "global", "us-central1").
        datastore_id: The ID of your Discovery Engine datastore.
        gcs_uri: The Google Cloud Storage URI pointing to your documents.
                   This can be a single file or a wildcard pattern (e.g., "gs://bucket-name/path/to/documents/*.pdf").

    Returns:
        str: The operation name of the import process, which can be used to track its status.
    """
    client_options = (
        ClientOptions(api_endpoint=f"{location}-discoveryengine.googleapis.com")
        if location != "global"
        else None
    )

    # Create a client
    client = discoveryengine.DocumentServiceClient(client_options=client_options)

    # The full resource name of the search engine branch.
    parent = client.branch_path(
        project=project_id,
        location=location,
        data_store=datastore_id,
        branch="default_branch"
    )

    request = discoveryengine.ImportDocumentsRequest(
        parent=parent,
        gcs_source=discoveryengine.GcsSource(
            # Multiple URIs are supported
            input_uris=[gcs_uri],
            # Options:
            # - `content` - Unstructured documents (PDF, HTML, DOC, TXT, PPTX)
            # - `custom` - Unstructured documents with custom JSONL metadata
            # - `document` - Structured documents in the discoveryengine.Document format.
            # - `csv` - Unstructured documents with CSV metadata
            data_schema="content",
        ),
        # Options: `FULL`, `INCREMENTAL`
        reconciliation_mode=discoveryengine.ImportDocumentsRequest.ReconciliationMode.INCREMENTAL,
    )

    # Make the request
    operation = client.import_documents(request=request)

    print(f"Waiting for operation to complete: {operation.operation.name}")
    response = operation.result()

    # After the operation is complete,
    # get information from operation metadata
    metadata = discoveryengine.ImportDocumentsMetadata(operation.metadata)

    # Handle the response
    print(response)
    print(metadata)

    return operation.operation.name

In [None]:
import_documents_gcs(
    project_id,
    location,
    datastore_id,
    gcs_uri
)

Waiting for operation to complete: projects/158103152291/locations/global/collections/default_collection/dataStores/datastorechat-eikon-devai/branches/0/operations/import-documents-10011971609147938399
error_config {
  gcs_prefix: "gs://158103152291_asia_southeast2_import_content/errors10011971609147939828"
}

create_time {
  seconds: 1740388533
  nanos: 671942000
}
update_time {
  seconds: 1740388866
  nanos: 831877000
}
success_count: 1
total_count: 1



'projects/158103152291/locations/global/collections/default_collection/dataStores/datastorechat-eikon-devai/branches/0/operations/import-documents-10011971609147938399'