-
Notifications
You must be signed in to change notification settings - Fork 2
/
create.py
55 lines (41 loc) · 2.18 KB
/
create.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
from src.index.create_data_store import create_data_store
from src.index.create_app import create_doc_search_app
from src.index.ingest_data import ingest_documents
from src.config.logging import logger
from src.config.setup import config
def validate_configuration(config):
""" Validate the essential configuration parameters. """
assert config.BUCKET, "Bucket name must be set in the configuration."
def create_index_for_document_search(data_store_display_name, data_store_id, gcs_input_uri):
"""
Orchestrate the creation of an index for document search.
The process involves:
1. Creating a data store with a given display name and identifier.
2. Ingesting documents from a specified Google Cloud Storage URI into the data store.
3. Creating a document search application with the newly ingested data.
Parameters:
data_store_display_name (str): The display name for the data store.
data_store_id (str): The unique identifier for the data store.
gcs_input_uri (str): The Google Cloud Storage URI where the input documents are stored.
Raises:
AssertionError: If any of the input parameters are empty.
Exception: For any errors that occur during the data store creation, document ingestion, or app creation.
"""
logger.info("Starting the index creation process for document search.")
try:
logger.info("Creating data store.")
create_data_store(data_store_display_name, data_store_id)
logger.info("Ingesting documents.")
ingest_documents(gcs_input_uri, data_store_id)
logger.info("Creating document search application.")
create_doc_search_app(data_store_display_name, data_store_id)
logger.info("Index creation process completed successfully.")
except Exception as e:
logger.error(f"An error occurred during the index creation process: {e}")
raise
if __name__ == '__main__':
validate_configuration(config)
data_store_display_name = 'quarterly-reports'
data_store_id = 'quarterly-reports'
gcs_input_uri = f'gs://{config.BUCKET}/raw_docs/metadata.json'
create_index_for_document_search(data_store_display_name, data_store_id, gcs_input_uri)