## 0. Install Libraries

In [5]:
pip install --user google-cloud-discoveryengine

Collecting google-cloud-discoveryengine
  Obtaining dependency information for google-cloud-discoveryengine from https://files.pythonhosted.org/packages/93/38/a33ee674ad7d5045caed2c5836506d2fe3c323ba3735d8391a1d2c9d87e5/google_cloud_discoveryengine-0.11.1-py3-none-any.whl.metadata
  Using cached google_cloud_discoveryengine-0.11.1-py3-none-any.whl.metadata (5.1 kB)
Using cached google_cloud_discoveryengine-0.11.1-py3-none-any.whl (523 kB)
Installing collected packages: google-cloud-discoveryengine
Successfully installed google-cloud-discoveryengine-0.11.1
Note: you may need to restart the kernel to use updated packages.


Make sure to restart Kernel after installation and before importing libraries

## 1. Create an App and a Data Store

Right now this step is done manually. 

Follow steps: 
https://cloud.google.com/generative-ai-app-builder/docs/create-data-store-es#cloud-storage

In [54]:
%%bash 

export PROJECT_ID=$(gcloud config get-value project)
export DATA_STORE_ID="alphabet-investor-search"
export DISPLAY_NAME="alphabet-investor-search-store"

curl -X POST \
-H "Authorization: Bearer $(gcloud auth print-access-token)" \
-H "Content-Type: application/json" \
-H "X-Goog-User-Project: ${PROJECT_ID}" \
"https://discoveryengine.googleapis.com/v1alpha/projects/${PROJECT_ID}/locations/global/collections/default_collection/dataStores?dataStoreId=${DATA_STORE_ID}" \
-d '{
  "displayName": "${DISPLAY_NAME}",
  "industryVertical": "GENERIC",
  "solutionTypes": ["SOLUTION_TYPE_SEARCH"],
  "contentConfig": "CONTENT_REQUIRED",
  "searchTier": "STANDARD",
  "searchAddOns": ["LLM"]
}'

{
  "name": "projects/185246287903/locations/global/collections/default_collection/operations/create-data-store-6602823812730522192",
  "done": true
}


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   358    0   151  100   207     26     36  0:00:05  0:00:05 --:--:--    34


## 2. Import Documents

In [34]:
!gsutil ls gs://cloud-samples-data/gen-app-builder/search/alphabet-investor-pdfs/

gs://cloud-samples-data/gen-app-builder/search/alphabet-investor-pdfs/20040630_google_10Q.pdf
gs://cloud-samples-data/gen-app-builder/search/alphabet-investor-pdfs/20040930_google_10Q.pdf
gs://cloud-samples-data/gen-app-builder/search/alphabet-investor-pdfs/2004Q3_earnings.pdf
gs://cloud-samples-data/gen-app-builder/search/alphabet-investor-pdfs/2004Q4_earnings_google.pdf
gs://cloud-samples-data/gen-app-builder/search/alphabet-investor-pdfs/2004_google_annual_report.pdf
gs://cloud-samples-data/gen-app-builder/search/alphabet-investor-pdfs/20050331_google_10Q.pdf
gs://cloud-samples-data/gen-app-builder/search/alphabet-investor-pdfs/20050630_google_10Q.pdf
gs://cloud-samples-data/gen-app-builder/search/alphabet-investor-pdfs/20050930_google_10Q.pdf
gs://cloud-samples-data/gen-app-builder/search/alphabet-investor-pdfs/20051231_10-K.pdf
gs://cloud-samples-data/gen-app-builder/search/alphabet-investor-pdfs/2005Q1_earnings_google.pdf
gs://cloud-samples-data/gen-app-builder/search/alphabet-in

In [36]:
from typing import Optional

from google.api_core.client_options import ClientOptions
from google.cloud import discoveryengine

# TODO(developer): Uncomment these variables before running the sample.
project_id = ! (gcloud config get-value project) 
project_id = project_id[0]
location = "global" # Values: "global"
data_store_id = "alphabet-investor-search-s_1696276494152"

# Must specify either `gcs_uri` or (`bigquery_dataset` and `bigquery_table`)
# Format: `gs://bucket/directory/object.json` or `gs://bucket/directory/*.json`
gcs_uri = "gs://cloud-samples-data/gen-app-builder/search/alphabet-investor-pdfs/*.pdf"
# bigquery_dataset = "YOUR_BIGQUERY_DATASET"
# bigquery_table = "YOUR_BIGQUERY_TABLE"


def import_documents_sample(
    project_id: str,
    location: str,
    data_store_id: str,
    gcs_uri: Optional[str] = None,
    bigquery_dataset: Optional[str] = None,
    bigquery_table: Optional[str] = None,
) -> str:
    #  For more information, refer to:
    # https://cloud.google.com/generative-ai-app-builder/docs/locations#specify_a_multi-region_for_your_data_store
    client_options = (
        ClientOptions(api_endpoint=f"{location}-discoveryengine.googleapis.com")
        if location != "global"
        else None
    )

    # Create a client
    client = discoveryengine.DocumentServiceClient(client_options=client_options)

    # The full resource name of the search engine branch.
    # e.g. projects/{project}/locations/{location}/dataStores/{data_store_id}/branches/{branch}
    parent = client.branch_path(
        project=project_id,
        location=location,
        data_store=data_store_id,
        branch="default_branch",
    )
    
    if gcs_uri:
        request = discoveryengine.ImportDocumentsRequest(
            parent=parent,
            gcs_source=discoveryengine.GcsSource(
                input_uris=[gcs_uri], data_schema="custom"
            ),
            # Options: `FULL`, `INCREMENTAL`
            reconciliation_mode=discoveryengine.ImportDocumentsRequest.ReconciliationMode.INCREMENTAL,
        )
    else:
        request = discoveryengine.ImportDocumentsRequest(
            parent=parent,
            bigquery_source=discoveryengine.BigQuerySource(
                project_id=project_id,
                dataset_id=bigquery_dataset,
                table_id=bigquery_table,
                data_schema="custom",
            ),
            # Options: `FULL`, `INCREMENTAL`
            reconciliation_mode=discoveryengine.ImportDocumentsRequest.ReconciliationMode.INCREMENTAL,
        )

    # Make the request
    operation = client.import_documents(request=request)

    print(f"Waiting for operation to complete: {operation.operation.name}")
    response = operation.result()

    # Once the operation is complete,
    # get information from operation metadata
    metadata = discoveryengine.ImportDocumentsMetadata(operation.metadata)

    # Handle the response
    print(response)
    print(metadata)

    return operation.operation.name

import_documents_sample(
    project_id,
    location,
    data_store_id,
    gcs_uri = gcs_uri,
)

InvalidArgument: 400 The request contained 173 files which exceeds the maximum number of files allowed (100).

In [14]:
!gcloud auth list

                  Credentialed Accounts
ACTIVE  ACCOUNT
*       185246287903-compute@developer.gserviceaccount.com

To set the active account, run:
    $ gcloud config set account `ACCOUNT`



## 2. Create a Data Store

In [None]:
curl -X POST \
-H "Authorization: Bearer $(gcloud auth print-access-token)" \
-H "Content-Type: application/json" \
-H "X-Goog-User-Project: PROJECT_ID" \
"https://discoveryengine.googleapis.com/v1alpha/projects/PROJECT_ID/locations/global/collections/default_collection/engines?engineId=DATA_STORE_ID" \
-d '{
  "displayName": "DISPLAY_NAME",
  "dataStoreIds": ["DATA_STORE_ID"],
  "solutionType": ["SOLUTION_TYPE_SEARCH"]
}'